diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index c7c98eadaf67bc..523956eb8c738d 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1812,3 +1812,55 @@ (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) (VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + +// remove flags → bool → flags roundtrip +// Only do it if the flag generating instruction is local otherwise the likelihood flagalloc won't undo this optimization and makes things worse are slim. +(NE t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no) +(NE t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no) +(NE t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no) +(NE t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s) yes no) && t.Block == s.Block => ((EQ|NE|LT|GT|LE|GE|UGT|ULT|UGE|ULE|EQF|NEF|UGE|UGT) flags yes no) + +(CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVQNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVQNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) + +(CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVLNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) + +(CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) +(CMOVWNE yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) yes no flags) + +(SETNE t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s +(SETNE t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s +(SETNE t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags)) x)) && t.Block == s.Block => s +(SETNE t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) flags) s)) && t.Block == s.Block => s + +(EQ t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no) +(EQ t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no) +(EQ t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no) +(EQ t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s) yes no) && t.Block == s.Block => ((NE|EQ|GE|LE|GT|LT|ULE|UGE|ULT|UGT) flags yes no) + +(CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVQEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVQ(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) + +(CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVLEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVL(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) + +(CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) +(CMOVWEQ yes no t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (CMOVW(NE|EQ|GE|LE|GT|LT|LS|CC|CS|HI) yes no flags) + +(SETEQ t:(TESTQ x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags) +(SETEQ t:(TESTL x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags) +(SETEQ t:(TESTW x:(MOVBQZX s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags)) x)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags) +(SETEQ t:(TESTB s:(SET(EQ|NE|L|G|LE|GE|A|B|AE|BE) flags) s)) && t.Block == s.Block => (SET(NE|EQ|GE|LE|G|L|BE|AE|B|A) flags) diff --git a/src/cmd/compile/internal/ssa/prove.go b/src/cmd/compile/internal/ssa/prove.go index effbcf1d51d78d..6f2143271115ff 100644 --- a/src/cmd/compile/internal/ssa/prove.go +++ b/src/cmd/compile/internal/ssa/prove.go @@ -2720,7 +2720,7 @@ var invertEqNeqOp = map[Op]Op{ // simplifyBlock simplifies some constant values in b and evaluates // branches to non-uniquely dominated successors of b. func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { - for iv, v := range b.Values { + for _, v := range b.Values { switch v.Op { case OpStaticLECall: if b.Func.pass.debug > 0 && len(v.Args) == 2 { @@ -2874,14 +2874,6 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { v.reset(OpCondSelect) v.AddArg3(y, zero, check) - // FIXME: workaround for go.dev/issues/76060 - // we need to schedule the Neq before the CondSelect even tho - // scheduling is meaningless until we reach the schedule pass. - if b.Values[len(b.Values)-1] != check { - panic("unreachable; failed sanity check, new value isn't at the end of the block") - } - b.Values[iv], b.Values[len(b.Values)-1] = b.Values[len(b.Values)-1], b.Values[iv] - if b.Func.pass.debug > 0 { b.Func.Warnl(v.Pos, "Rewrote Mul %v into CondSelect; %v is bool", v, x) } diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 6a4b2e979fbc58..81c46781aafc91 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -9485,6 +9485,1246 @@ func rewriteValueAMD64_OpAMD64CMOVLEQ(v *Value) bool { } break } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLEQ yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break + } return false } func rewriteValueAMD64_OpAMD64CMOVLGE(v *Value) bool { @@ -10111,574 +11351,1750 @@ func rewriteValueAMD64_OpAMD64CMOVLNE(v *Value) bool { } break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQCC(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVQCC x y (InvertFlags cond)) - // result: (CMOVQLS x y cond) + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQLS) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCC _ x (FlagEQ)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCC _ x (FlagGT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCC y _ (FlagGT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCC y _ (FlagLT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCC _ x (FlagLT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQCS(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVQCS x y (InvertFlags cond)) - // result: (CMOVQHI x y cond) + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQHI) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCS y _ (FlagEQ)) - // result: y + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCS y _ (FlagGT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCS _ x (FlagGT_ULT)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCS _ x (FlagLT_ULT)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQF yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQCS y _ (FlagLT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNEF yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQEQ(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CMOVQEQ x y (InvertFlags cond)) - // result: (CMOVQEQ x y cond) + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGTF yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQEQ) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ _ x (FlagEQ)) - // result: x + // match: (CMOVLNE yes no t:(TESTQ x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGEF yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ y _ (FlagGT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ y _ (FlagGT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ y _ (FlagLT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ y _ (FlagLT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) - // cond: c != 0 - // result: x + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) for { - x := v_0 - if v_2.Op != OpSelect1 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v_2_0 := v_2.Args[0] - if v_2_0.Op != OpAMD64BSFQ { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true } - v_2_0_0 := v_2_0.Args[0] - if v_2_0_0.Op != OpAMD64ORQconst { + break + } + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_2_0_0.AuxInt) - if !(c != 0) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true } - v.copyOf(x) - return true + break } - // match: (CMOVQEQ x _ (Select1 (BSRQ (ORQconst [c] _)))) - // cond: c != 0 - // result: x + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) for { - x := v_0 - if v_2.Op != OpSelect1 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v_2_0 := v_2.Args[0] - if v_2_0.Op != OpAMD64BSRQ { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true } - v_2_0_0 := v_2_0.Args[0] - if v_2_0_0.Op != OpAMD64ORQconst { + break + } + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_2_0_0.AuxInt) - if !(c != 0) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true } - v.copyOf(x) - return true + break } - // match: (CMOVQEQ x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) - // result: (CMOVQEQ x y (Select1 blsr)) + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64TESTQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - _ = v_2.Args[1] - v_2_0 := v_2.Args[0] - v_2_1 := v_2.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 { - s := v_2_0 - if s.Op != OpSelect0 { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRQ || s != v_2_1 { + s := x.Args[0] + if s.Op != OpAMD64SETAE { continue } - v.reset(OpAMD64CMOVQEQ) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg3(x, y, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) return true } break } - // match: (CMOVQEQ x y (TESTL s:(Select0 blsr:(BLSRL _)) s)) - // result: (CMOVQEQ x y (Select1 blsr)) + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64TESTL { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - _ = v_2.Args[1] - v_2_0 := v_2.Args[0] - v_2_1 := v_2.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 { - s := v_2_0 - if s.Op != OpSelect0 { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRL || s != v_2_1 { + s := x.Args[0] + if s.Op != OpAMD64SETBE { continue } - v.reset(OpAMD64CMOVQEQ) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg3(x, y, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQGE(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CMOVQGE x y (InvertFlags cond)) - // result: (CMOVQLE x y cond) + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQF yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQLE) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE _ x (FlagEQ)) - // result: x + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNEF yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE _ x (FlagGT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGTF yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE _ x (FlagGT_ULT)) - // result: x + // match: (CMOVLNE yes no t:(TESTL x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGEF yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE y _ (FlagLT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE y _ (FlagLT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE x y c:(CMPQconst [128] z)) - // cond: c.Uses == 1 - // result: (CMOVQGT x y (CMPQconst [127] z)) + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) for { - x := v_0 - y := v_1 - c := v_2 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - z := c.Args[0] - if !(c.Uses == 1) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64CMOVQGT) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - v.AddArg3(x, y, v0) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGE x y c:(CMPLconst [128] z)) - // cond: c.Uses == 1 - // result: (CMOVQGT x y (CMPLconst [127] z)) + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) for { - x := v_0 - y := v_1 - c := v_2 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - z := c.Args[0] - if !(c.Uses == 1) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64CMOVQGT) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - v.AddArg3(x, y, v0) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQGT(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVQGT x y (InvertFlags cond)) - // result: (CMOVQLT x y cond) + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQLT) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGT y _ (FlagEQ)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGT _ x (FlagGT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGT _ x (FlagGT_ULT)) - // result: x + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGT y _ (FlagLT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLEQF yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQGT y _ (FlagLT_UGT)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLNEF yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQHI(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVQHI x y (InvertFlags cond)) - // result: (CMOVQCS x y cond) + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGTF yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQCS) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQHI y _ (FlagEQ)) - // result: y + // match: (CMOVLNE yes no t:(TESTW x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVLGEF yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQHI _ x (FlagGT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLEQ yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQHI y _ (FlagGT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLNE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQHI y _ (FlagLT_ULT)) - // result: y + // match: (CMOVLNE yes no t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVQHI _ x (FlagLT_UGT)) - // result: x + // match: (CMOVLNE yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGT yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETEQF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLEQF yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETNEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLNEF yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETGF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGTF yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVLNE yes no t:(TESTB s:(SETGEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVLGEF yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(yes, no, flags) + return true + } + break } return false } -func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQCC(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVQLE x y (InvertFlags cond)) - // result: (CMOVQGE x y cond) + // match: (CMOVQCC x y (InvertFlags cond)) + // result: (CMOVQLS x y cond) for { x := v_0 y := v_1 @@ -10686,11 +13102,11 @@ func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVQGE) + v.reset(OpAMD64CMOVQLS) v.AddArg3(x, y, cond) return true } - // match: (CMOVQLE _ x (FlagEQ)) + // match: (CMOVQCC _ x (FlagEQ)) // result: x for { x := v_1 @@ -10700,17 +13116,17 @@ func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVQLE y _ (FlagGT_UGT)) - // result: y + // match: (CMOVQCC _ x (FlagGT_UGT)) + // result: x for { - y := v_0 + x := v_1 if v_2.Op != OpAMD64FlagGT_UGT { break } - v.copyOf(y) + v.copyOf(x) return true } - // match: (CMOVQLE y _ (FlagGT_ULT)) + // match: (CMOVQCC y _ (FlagGT_ULT)) // result: y for { y := v_0 @@ -10720,17 +13136,17 @@ func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVQLE _ x (FlagLT_ULT)) - // result: x + // match: (CMOVQCC y _ (FlagLT_ULT)) + // result: y for { - x := v_1 + y := v_0 if v_2.Op != OpAMD64FlagLT_ULT { break } - v.copyOf(x) + v.copyOf(y) return true } - // match: (CMOVQLE _ x (FlagLT_UGT)) + // match: (CMOVQCC _ x (FlagLT_UGT)) // result: x for { x := v_1 @@ -10742,12 +13158,12 @@ func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQCS(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVQLS x y (InvertFlags cond)) - // result: (CMOVQCC x y cond) + // match: (CMOVQCS x y (InvertFlags cond)) + // result: (CMOVQHI x y cond) for { x := v_0 y := v_1 @@ -10755,21 +13171,21 @@ func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVQCC) + v.reset(OpAMD64CMOVQHI) v.AddArg3(x, y, cond) return true } - // match: (CMOVQLS _ x (FlagEQ)) - // result: x + // match: (CMOVQCS y _ (FlagEQ)) + // result: y for { - x := v_1 + y := v_0 if v_2.Op != OpAMD64FlagEQ { break } - v.copyOf(x) + v.copyOf(y) return true } - // match: (CMOVQLS y _ (FlagGT_UGT)) + // match: (CMOVQCS y _ (FlagGT_UGT)) // result: y for { y := v_0 @@ -10779,7 +13195,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVQLS _ x (FlagGT_ULT)) + // match: (CMOVQCS _ x (FlagGT_ULT)) // result: x for { x := v_1 @@ -10789,7 +13205,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVQLS _ x (FlagLT_ULT)) + // match: (CMOVQCS _ x (FlagLT_ULT)) // result: x for { x := v_1 @@ -10799,7 +13215,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVQLS y _ (FlagLT_UGT)) + // match: (CMOVQCS y _ (FlagLT_UGT)) // result: y for { y := v_0 @@ -10811,13 +13227,13 @@ func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64CMOVQLT(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQEQ(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (CMOVQLT x y (InvertFlags cond)) - // result: (CMOVQGT x y cond) + // match: (CMOVQEQ x y (InvertFlags cond)) + // result: (CMOVQEQ x y cond) for { x := v_0 y := v_1 @@ -10825,21 +13241,21 @@ func rewriteValueAMD64_OpAMD64CMOVQLT(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVQGT) + v.reset(OpAMD64CMOVQEQ) v.AddArg3(x, y, cond) return true } - // match: (CMOVQLT y _ (FlagEQ)) - // result: y + // match: (CMOVQEQ _ x (FlagEQ)) + // result: x for { - y := v_0 + x := v_1 if v_2.Op != OpAMD64FlagEQ { break } - v.copyOf(y) + v.copyOf(x) return true } - // match: (CMOVQLT y _ (FlagGT_UGT)) + // match: (CMOVQEQ y _ (FlagGT_UGT)) // result: y for { y := v_0 @@ -10849,7 +13265,7 @@ func rewriteValueAMD64_OpAMD64CMOVQLT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVQLT y _ (FlagGT_ULT)) + // match: (CMOVQEQ y _ (FlagGT_ULT)) // result: y for { y := v_0 @@ -10859,140 +13275,74 @@ func rewriteValueAMD64_OpAMD64CMOVQLT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVQLT _ x (FlagLT_ULT)) - // result: x + // match: (CMOVQEQ y _ (FlagLT_ULT)) + // result: y for { - x := v_1 + y := v_0 if v_2.Op != OpAMD64FlagLT_ULT { break } - v.copyOf(x) + v.copyOf(y) return true } - // match: (CMOVQLT _ x (FlagLT_UGT)) - // result: x + // match: (CMOVQEQ y _ (FlagLT_UGT)) + // result: y for { - x := v_1 + y := v_0 if v_2.Op != OpAMD64FlagLT_UGT { break } - v.copyOf(x) + v.copyOf(y) return true } - // match: (CMOVQLT x y c:(CMPQconst [128] z)) - // cond: c.Uses == 1 - // result: (CMOVQLE x y (CMPQconst [127] z)) + // match: (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) + // cond: c != 0 + // result: x for { x := v_0 - y := v_1 - c := v_2 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { + if v_2.Op != OpSelect1 { break } - z := c.Args[0] - if !(c.Uses == 1) { + v_2_0 := v_2.Args[0] + if v_2_0.Op != OpAMD64BSFQ { break } - v.reset(OpAMD64CMOVQLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - v.AddArg3(x, y, v0) - return true - } - // match: (CMOVQLT x y c:(CMPLconst [128] z)) - // cond: c.Uses == 1 - // result: (CMOVQLE x y (CMPLconst [127] z)) - for { - x := v_0 - y := v_1 - c := v_2 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { + v_2_0_0 := v_2_0.Args[0] + if v_2_0_0.Op != OpAMD64ORQconst { break } - z := c.Args[0] - if !(c.Uses == 1) { + c := auxIntToInt32(v_2_0_0.AuxInt) + if !(c != 0) { break } - v.reset(OpAMD64CMOVQLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - v.AddArg3(x, y, v0) + v.copyOf(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CMOVQNE x y (InvertFlags cond)) - // result: (CMOVQNE x y cond) + // match: (CMOVQEQ x _ (Select1 (BSRQ (ORQconst [c] _)))) + // cond: c != 0 + // result: x for { x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + if v_2.Op != OpSelect1 { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVQNE) - v.AddArg3(x, y, cond) - return true - } - // match: (CMOVQNE y _ (FlagEQ)) - // result: y - for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { - break - } - v.copyOf(y) - return true - } - // match: (CMOVQNE _ x (FlagGT_UGT)) - // result: x - for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { - break - } - v.copyOf(x) - return true - } - // match: (CMOVQNE _ x (FlagGT_ULT)) - // result: x - for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + v_2_0 := v_2.Args[0] + if v_2_0.Op != OpAMD64BSRQ { break } - v.copyOf(x) - return true - } - // match: (CMOVQNE _ x (FlagLT_ULT)) - // result: x - for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_ULT { + v_2_0_0 := v_2_0.Args[0] + if v_2_0_0.Op != OpAMD64ORQconst { break } - v.copyOf(x) - return true - } - // match: (CMOVQNE _ x (FlagLT_UGT)) - // result: x - for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_UGT { + c := auxIntToInt32(v_2_0_0.AuxInt) + if !(c != 0) { break } v.copyOf(x) return true } - // match: (CMOVQNE x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) - // result: (CMOVQNE x y (Select1 blsr)) + // match: (CMOVQEQ x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) + // result: (CMOVQEQ x y (Select1 blsr)) for { x := v_0 y := v_1 @@ -11011,7 +13361,7 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { if blsr.Op != OpAMD64BLSRQ || s != v_2_1 { continue } - v.reset(OpAMD64CMOVQNE) + v.reset(OpAMD64CMOVQEQ) v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) v0.AddArg(blsr) v.AddArg3(x, y, v0) @@ -11019,8 +13369,8 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { } break } - // match: (CMOVQNE x y (TESTL s:(Select0 blsr:(BLSRL _)) s)) - // result: (CMOVQNE x y (Select1 blsr)) + // match: (CMOVQEQ x y (TESTL s:(Select0 blsr:(BLSRL _)) s)) + // result: (CMOVQEQ x y (Select1 blsr)) for { x := v_0 y := v_1 @@ -11039,7 +13389,7 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { if blsr.Op != OpAMD64BLSRL || s != v_2_1 { continue } - v.reset(OpAMD64CMOVQNE) + v.reset(OpAMD64CMOVQEQ) v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) v0.AddArg(blsr) v.AddArg3(x, y, v0) @@ -11047,323 +13397,1288 @@ func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { } break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVWCC(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVWCC x y (InvertFlags cond)) - // result: (CMOVWLS x y cond) + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVWLS) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCC _ x (FlagEQ)) - // result: x + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCC _ x (FlagGT_UGT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCC y _ (FlagGT_ULT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCC y _ (FlagLT_ULT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCC _ x (FlagLT_UGT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVWCS(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVWCS x y (InvertFlags cond)) - // result: (CMOVWHI x y cond) + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVWHI) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCS y _ (FlagEQ)) - // result: y + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCS y _ (FlagGT_UGT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCS _ x (FlagGT_ULT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCS _ x (FlagLT_ULT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWCS y _ (FlagLT_UGT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVWEQ(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVWEQ x y (InvertFlags cond)) - // result: (CMOVWEQ x y cond) + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGE yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVWEQ) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWEQ _ x (FlagEQ)) - // result: x + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWEQ y _ (FlagGT_UGT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWEQ y _ (FlagGT_ULT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWEQ y _ (FlagLT_ULT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWEQ y _ (FlagLT_UGT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVWGE(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVWGE x y (InvertFlags cond)) - // result: (CMOVWLE x y cond) + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVWLE) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGE _ x (FlagEQ)) - // result: x + // match: (CMOVQEQ yes no t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGE _ x (FlagGT_UGT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGE _ x (FlagGT_ULT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGE y _ (FlagLT_ULT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_ULT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGE y _ (FlagLT_UGT)) - // result: y + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagLT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMOVWGT(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMOVWGT x y (InvertFlags cond)) - // result: (CMOVWLT x y cond) + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64InvertFlags { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - cond := v_2.Args[0] - v.reset(OpAMD64CMOVWLT) - v.AddArg3(x, y, cond) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGT y _ (FlagEQ)) - // result: y + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) for { - y := v_0 - if v_2.Op != OpAMD64FlagEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMOVWGT _ x (FlagGT_UGT)) - // result: x + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) for { - x := v_1 - if v_2.Op != OpAMD64FlagGT_UGT { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.copyOf(x) - return true - } - // match: (CMOVWGT _ x (FlagGT_ULT)) - // result: x + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQGE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQEQ yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVQGE(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CMOVQGE x y (InvertFlags cond)) + // result: (CMOVQLE x y cond) + for { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { + break + } + cond := v_2.Args[0] + v.reset(OpAMD64CMOVQLE) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVQGE _ x (FlagEQ)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagEQ { + break + } + v.copyOf(x) + return true + } + // match: (CMOVQGE _ x (FlagGT_UGT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { + break + } + v.copyOf(x) + return true + } + // match: (CMOVQGE _ x (FlagGT_ULT)) + // result: x for { x := v_1 if v_2.Op != OpAMD64FlagGT_ULT { @@ -11372,7 +14687,7 @@ func rewriteValueAMD64_OpAMD64CMOVWGT(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWGT y _ (FlagLT_ULT)) + // match: (CMOVQGE y _ (FlagLT_ULT)) // result: y for { y := v_0 @@ -11382,7 +14697,7 @@ func rewriteValueAMD64_OpAMD64CMOVWGT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWGT y _ (FlagLT_UGT)) + // match: (CMOVQGE y _ (FlagLT_UGT)) // result: y for { y := v_0 @@ -11392,14 +14707,56 @@ func rewriteValueAMD64_OpAMD64CMOVWGT(v *Value) bool { v.copyOf(y) return true } + // match: (CMOVQGE x y c:(CMPQconst [128] z)) + // cond: c.Uses == 1 + // result: (CMOVQGT x y (CMPQconst [127] z)) + for { + x := v_0 + y := v_1 + c := v_2 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { + break + } + z := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64CMOVQGT) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + v.AddArg3(x, y, v0) + return true + } + // match: (CMOVQGE x y c:(CMPLconst [128] z)) + // cond: c.Uses == 1 + // result: (CMOVQGT x y (CMPLconst [127] z)) + for { + x := v_0 + y := v_1 + c := v_2 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { + break + } + z := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64CMOVQGT) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + v.AddArg3(x, y, v0) + return true + } return false } -func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQGT(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVWHI x y (InvertFlags cond)) - // result: (CMOVWCS x y cond) + // match: (CMOVQGT x y (InvertFlags cond)) + // result: (CMOVQLT x y cond) for { x := v_0 y := v_1 @@ -11407,11 +14764,11 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVWCS) + v.reset(OpAMD64CMOVQLT) v.AddArg3(x, y, cond) return true } - // match: (CMOVWHI y _ (FlagEQ)) + // match: (CMOVQGT y _ (FlagEQ)) // result: y for { y := v_0 @@ -11421,7 +14778,7 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWHI _ x (FlagGT_UGT)) + // match: (CMOVQGT _ x (FlagGT_UGT)) // result: x for { x := v_1 @@ -11431,7 +14788,76 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWHI y _ (FlagGT_ULT)) + // match: (CMOVQGT _ x (FlagGT_ULT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_ULT { + break + } + v.copyOf(x) + return true + } + // match: (CMOVQGT y _ (FlagLT_ULT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagLT_ULT { + break + } + v.copyOf(y) + return true + } + // match: (CMOVQGT y _ (FlagLT_UGT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagLT_UGT { + break + } + v.copyOf(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64CMOVQHI(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMOVQHI x y (InvertFlags cond)) + // result: (CMOVQCS x y cond) + for { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { + break + } + cond := v_2.Args[0] + v.reset(OpAMD64CMOVQCS) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVQHI y _ (FlagEQ)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagEQ { + break + } + v.copyOf(y) + return true + } + // match: (CMOVQHI _ x (FlagGT_UGT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { + break + } + v.copyOf(x) + return true + } + // match: (CMOVQHI y _ (FlagGT_ULT)) // result: y for { y := v_0 @@ -11441,7 +14867,7 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWHI y _ (FlagLT_ULT)) + // match: (CMOVQHI y _ (FlagLT_ULT)) // result: y for { y := v_0 @@ -11451,7 +14877,7 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWHI _ x (FlagLT_UGT)) + // match: (CMOVQHI _ x (FlagLT_UGT)) // result: x for { x := v_1 @@ -11463,12 +14889,12 @@ func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQLE(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVWLE x y (InvertFlags cond)) - // result: (CMOVWGE x y cond) + // match: (CMOVQLE x y (InvertFlags cond)) + // result: (CMOVQGE x y cond) for { x := v_0 y := v_1 @@ -11476,11 +14902,11 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVWGE) + v.reset(OpAMD64CMOVQGE) v.AddArg3(x, y, cond) return true } - // match: (CMOVWLE _ x (FlagEQ)) + // match: (CMOVQLE _ x (FlagEQ)) // result: x for { x := v_1 @@ -11490,7 +14916,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLE y _ (FlagGT_UGT)) + // match: (CMOVQLE y _ (FlagGT_UGT)) // result: y for { y := v_0 @@ -11500,7 +14926,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLE y _ (FlagGT_ULT)) + // match: (CMOVQLE y _ (FlagGT_ULT)) // result: y for { y := v_0 @@ -11510,7 +14936,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLE _ x (FlagLT_ULT)) + // match: (CMOVQLE _ x (FlagLT_ULT)) // result: x for { x := v_1 @@ -11520,7 +14946,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLE _ x (FlagLT_UGT)) + // match: (CMOVQLE _ x (FlagLT_UGT)) // result: x for { x := v_1 @@ -11532,12 +14958,12 @@ func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQLS(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVWLS x y (InvertFlags cond)) - // result: (CMOVWCC x y cond) + // match: (CMOVQLS x y (InvertFlags cond)) + // result: (CMOVQCC x y cond) for { x := v_0 y := v_1 @@ -11545,11 +14971,11 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVWCC) + v.reset(OpAMD64CMOVQCC) v.AddArg3(x, y, cond) return true } - // match: (CMOVWLS _ x (FlagEQ)) + // match: (CMOVQLS _ x (FlagEQ)) // result: x for { x := v_1 @@ -11559,7 +14985,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLS y _ (FlagGT_UGT)) + // match: (CMOVQLS y _ (FlagGT_UGT)) // result: y for { y := v_0 @@ -11569,7 +14995,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLS _ x (FlagGT_ULT)) + // match: (CMOVQLS _ x (FlagGT_ULT)) // result: x for { x := v_1 @@ -11579,7 +15005,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLS _ x (FlagLT_ULT)) + // match: (CMOVQLS _ x (FlagLT_ULT)) // result: x for { x := v_1 @@ -11589,7 +15015,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLS y _ (FlagLT_UGT)) + // match: (CMOVQLS y _ (FlagLT_UGT)) // result: y for { y := v_0 @@ -11601,12 +15027,13 @@ func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQLT(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVWLT x y (InvertFlags cond)) - // result: (CMOVWGT x y cond) + b := v.Block + // match: (CMOVQLT x y (InvertFlags cond)) + // result: (CMOVQGT x y cond) for { x := v_0 y := v_1 @@ -11614,11 +15041,11 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVWGT) + v.reset(OpAMD64CMOVQGT) v.AddArg3(x, y, cond) return true } - // match: (CMOVWLT y _ (FlagEQ)) + // match: (CMOVQLT y _ (FlagEQ)) // result: y for { y := v_0 @@ -11628,7 +15055,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLT y _ (FlagGT_UGT)) + // match: (CMOVQLT y _ (FlagGT_UGT)) // result: y for { y := v_0 @@ -11638,7 +15065,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLT y _ (FlagGT_ULT)) + // match: (CMOVQLT y _ (FlagGT_ULT)) // result: y for { y := v_0 @@ -11648,7 +15075,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWLT _ x (FlagLT_ULT)) + // match: (CMOVQLT _ x (FlagLT_ULT)) // result: x for { x := v_1 @@ -11658,7 +15085,7 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWLT _ x (FlagLT_UGT)) + // match: (CMOVQLT _ x (FlagLT_UGT)) // result: x for { x := v_1 @@ -11668,14 +15095,57 @@ func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { v.copyOf(x) return true } + // match: (CMOVQLT x y c:(CMPQconst [128] z)) + // cond: c.Uses == 1 + // result: (CMOVQLE x y (CMPQconst [127] z)) + for { + x := v_0 + y := v_1 + c := v_2 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { + break + } + z := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64CMOVQLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + v.AddArg3(x, y, v0) + return true + } + // match: (CMOVQLT x y c:(CMPLconst [128] z)) + // cond: c.Uses == 1 + // result: (CMOVQLE x y (CMPLconst [127] z)) + for { + x := v_0 + y := v_1 + c := v_2 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { + break + } + z := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64CMOVQLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + v.AddArg3(x, y, v0) + return true + } return false } -func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVQNE(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CMOVWNE x y (InvertFlags cond)) - // result: (CMOVWNE x y cond) + b := v.Block + // match: (CMOVQNE x y (InvertFlags cond)) + // result: (CMOVQNE x y cond) for { x := v_0 y := v_1 @@ -11683,11 +15153,11 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { break } cond := v_2.Args[0] - v.reset(OpAMD64CMOVWNE) + v.reset(OpAMD64CMOVQNE) v.AddArg3(x, y, cond) return true } - // match: (CMOVWNE y _ (FlagEQ)) + // match: (CMOVQNE y _ (FlagEQ)) // result: y for { y := v_0 @@ -11697,7 +15167,7 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { v.copyOf(y) return true } - // match: (CMOVWNE _ x (FlagGT_UGT)) + // match: (CMOVQNE _ x (FlagGT_UGT)) // result: x for { x := v_1 @@ -11707,7 +15177,7 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWNE _ x (FlagGT_ULT)) + // match: (CMOVQNE _ x (FlagGT_ULT)) // result: x for { x := v_1 @@ -11717,7 +15187,7 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWNE _ x (FlagLT_ULT)) + // match: (CMOVQNE _ x (FlagLT_ULT)) // result: x for { x := v_1 @@ -11727,7 +15197,7 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { v.copyOf(x) return true } - // match: (CMOVWNE _ x (FlagLT_UGT)) + // match: (CMOVQNE _ x (FlagLT_UGT)) // result: x for { x := v_1 @@ -11737,4407 +15207,5809 @@ func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { v.copyOf(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64CMPB(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CMPB x (MOVLconst [c])) - // result: (CMPBconst x [int8(c)]) + // match: (CMOVQNE x y (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) + // result: (CMOVQNE x y (Select1 blsr)) for { x := v_0 - if v_1.Op != OpAMD64MOVLconst { + y := v_1 + if v_2.Op != OpAMD64TESTQ { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64CMPBconst) - v.AuxInt = int8ToAuxInt(int8(c)) - v.AddArg(x) - return true - } - // match: (CMPB (MOVLconst [c]) x) - // result: (InvertFlags (CMPBconst x [int8(c)])) - for { - if v_0.Op != OpAMD64MOVLconst { - break + _ = v_2.Args[1] + v_2_0 := v_2.Args[0] + v_2_1 := v_2.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 { + s := v_2_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRQ || s != v_2_1 { + continue + } + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg3(x, y, v0) + return true } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(c)) - v0.AddArg(x) - v.AddArg(v0) - return true + break } - // match: (CMPB x y) - // cond: canonLessThan(x,y) - // result: (InvertFlags (CMPB y x)) + // match: (CMOVQNE x y (TESTL s:(Select0 blsr:(BLSRL _)) s)) + // result: (CMOVQNE x y (Select1 blsr)) for { x := v_0 y := v_1 - if !(canonLessThan(x, y)) { + if v_2.Op != OpAMD64TESTL { break } - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) - return true + _ = v_2.Args[1] + v_2_0 := v_2.Args[0] + v_2_1 := v_2.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_2_0, v_2_1 = _i0+1, v_2_1, v_2_0 { + s := v_2_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRL || s != v_2_1 { + continue + } + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg3(x, y, v0) + return true + } + break } - // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (CMPBload {sym} [off] ptr x mem) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPBload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (CMPB x l:(MOVBload {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (InvertFlags (CMPBload {sym} [off] ptr x mem)) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(l.Pos, OpAMD64CMPBload, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg3(ptr, x, mem) - v.AddArg(v0) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPBconst(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CMPBconst (MOVLconst [x]) [y]) - // cond: int8(x)==y - // result: (FlagEQ) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) for { - y := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := auxIntToInt32(v_0.AuxInt) - if !(int8(x) == y) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagEQ) - return true + break } - // match: (CMPBconst (MOVLconst [x]) [y]) - // cond: int8(x)uint8(y) - // result: (FlagLT_UGT) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) for { - y := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := auxIntToInt32(v_0.AuxInt) - if !(int8(x) < y && uint8(x) > uint8(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_UGT) - return true + break } - // match: (CMPBconst (MOVLconst [x]) [y]) - // cond: int8(x)>y && uint8(x) y && uint8(x) < uint8(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_ULT) - return true + break } - // match: (CMPBconst (MOVLconst [x]) [y]) - // cond: int8(x)>y && uint8(x)>uint8(y) - // result: (FlagGT_UGT) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) for { - y := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := auxIntToInt32(v_0.AuxInt) - if !(int8(x) > y && uint8(x) > uint8(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_UGT) - return true + break } - // match: (CMPBconst (ANDLconst _ [m]) [n]) - // cond: 0 <= int8(m) && int8(m) < n - // result: (FlagLT_ULT) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) for { - n := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64ANDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - m := auxIntToInt32(v_0.AuxInt) - if !(0 <= int8(m) && int8(m) < n) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_ULT) - return true + break } - // match: (CMPBconst a:(ANDL x y) [0]) - // cond: a.Uses == 1 - // result: (TESTB x y) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) for { - if auxIntToInt8(v.AuxInt) != 0 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - a := v_0 - if a.Op != OpAMD64ANDL { - break - } - y := a.Args[1] - x := a.Args[0] - if !(a.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64TESTB) - v.AddArg2(x, y) - return true + break } - // match: (CMPBconst a:(ANDLconst [c] x) [0]) - // cond: a.Uses == 1 - // result: (TESTBconst [int8(c)] x) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) for { - if auxIntToInt8(v.AuxInt) != 0 { - break - } - a := v_0 - if a.Op != OpAMD64ANDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64TESTBconst) - v.AuxInt = int8ToAuxInt(int8(c)) - v.AddArg(x) - return true + break } - // match: (CMPBconst x [0]) - // result: (TESTB x x) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQF yes no flags) for { - if auxIntToInt8(v.AuxInt) != 0 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := v_0 - v.reset(OpAMD64TESTB) - v.AddArg2(x, x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c]) - // cond: l.Uses == 1 && clobber(l) - // result: @l.Block (CMPBconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNEF yes no flags) for { - c := auxIntToInt8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(l.Uses == 1 && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNEF) + v.AddArg3(yes, no, flags) + return true } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPBconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPBconstload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPBconstload [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (CMPBconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGTF yes no flags) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGTF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPBconstload) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(sym) - v.AddArg2(base, mem) - return true + break } - // match: (CMPBconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (CMPBconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + // match: (CMOVQNE yes no t:(TESTQ x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGEF yes no flags) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPBconstload) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPBload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPBload [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (CMPBload [off1+off2] {sym} base val mem) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQ yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPBload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) - return true + break } - // match: (CMPBload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (CMPBload [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQNE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPBload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true + break } - // match: (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) - // result: (CMPBconstload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64CMPBconstload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CMPL x (MOVLconst [c])) - // result: (CMPLconst x [c]) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64CMPLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPL (MOVLconst [c]) x) - // result: (InvertFlags (CMPLconst x [c])) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) for { - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(c) - v0.AddArg(x) - v.AddArg(v0) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPL x y) - // cond: canonLessThan(x,y) - // result: (InvertFlags (CMPL y x)) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGE yes no flags) for { - x := v_0 - y := v_1 - if !(canonLessThan(x, y)) { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (CMPLload {sym} [off] ptr x mem) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVLload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (CMPL x l:(MOVLload {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (InvertFlags (CMPLload {sym} [off] ptr x mem)) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVLload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(l.Pos, OpAMD64CMPLload, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg3(ptr, x, mem) - v.AddArg(v0) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPLconst(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CMPLconst (MOVLconst [x]) [y]) - // cond: x==y - // result: (FlagEQ) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - x := auxIntToInt32(v_0.AuxInt) - if !(x == y) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagEQ) - return true + break } - // match: (CMPLconst (MOVLconst [x]) [y]) - // cond: xuint32(y) - // result: (FlagLT_UGT) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQEQF yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - x := auxIntToInt32(v_0.AuxInt) - if !(x < y && uint32(x) > uint32(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQEQF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_UGT) - return true + break } - // match: (CMPLconst (MOVLconst [x]) [y]) - // cond: x>y && uint32(x) y && uint32(x) < uint32(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_ULT) - return true + break } - // match: (CMPLconst (MOVLconst [x]) [y]) - // cond: x>y && uint32(x)>uint32(y) - // result: (FlagGT_UGT) + // match: (CMOVQNE yes no t:(TESTL x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVQGTF yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - x := auxIntToInt32(v_0.AuxInt) - if !(x > y && uint32(x) > uint32(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGTF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_UGT) - return true + break } - // match: (CMPLconst (SHRLconst _ [c]) [n]) - // cond: 0 <= n && 0 < c && c <= 32 && (1<uint64(y) - // result: (FlagLT_UGT) + // match: (CMOVQNE yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQGT yes no flags) for { - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := auxIntToInt64(v_0.AuxInt) - if v_1.Op != OpAMD64MOVQconst { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVQNE yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQLE yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - y := auxIntToInt64(v_1.AuxInt) - if !(x < y && uint64(x) > uint64(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_UGT) - return true + break } - // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) - // cond: x>y && uint64(x) y && uint64(x) < uint64(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_ULT) - return true + break } - // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) - // cond: x>y && uint64(x)>uint64(y) - // result: (FlagGT_UGT) + // match: (CMOVQNE yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQHI yes no flags) for { - if v_0.Op != OpAMD64MOVQconst { - break - } - x := auxIntToInt64(v_0.AuxInt) - if v_1.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - y := auxIntToInt64(v_1.AuxInt) - if !(x > y && uint64(x) > uint64(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_UGT) - return true + break } - // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (CMPQload {sym} [off] ptr x mem) + // match: (CMOVQNE yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQCS yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVQload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (InvertFlags (CMPQload {sym} [off] ptr x mem)) + // match: (CMOVQNE yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQCC yes no flags) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVQload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64InvertFlags) - v0 := b.NewValue0(l.Pos, OpAMD64CMPQload, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg3(ptr, x, mem) - v.AddArg(v0) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPQconst(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (CMPQconst (MOVQconst [x]) [y]) - // cond: x==int64(y) - // result: (FlagEQ) + // match: (CMOVQNE yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQLS yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := auxIntToInt64(v_0.AuxInt) - if !(x == int64(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagEQ) - return true + break } - // match: (CMPQconst (MOVQconst [x]) [y]) - // cond: xuint64(int64(y)) - // result: (FlagLT_UGT) + // match: (CMOVQNE yes no t:(TESTB s:(SETNEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQNEF yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := auxIntToInt64(v_0.AuxInt) - if !(x < int64(y) && uint64(x) > uint64(int64(y))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQNEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_UGT) - return true + break } - // match: (CMPQconst (MOVQconst [x]) [y]) - // cond: x>int64(y) && uint64(x) int64(y) && uint64(x) < uint64(int64(y))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGTF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_ULT) - return true + break } - // match: (CMPQconst (MOVQconst [x]) [y]) - // cond: x>int64(y) && uint64(x)>uint64(int64(y)) - // result: (FlagGT_UGT) + // match: (CMOVQNE yes no t:(TESTB s:(SETGEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVQGEF yes no flags) for { - y := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := auxIntToInt64(v_0.AuxInt) - if !(x > int64(y) && uint64(x) > uint64(int64(y))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVQGEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_UGT) - return true + break } - // match: (CMPQconst (MOVBQZX _) [c]) - // cond: 0xFF < c - // result: (FlagLT_ULT) + return false +} +func rewriteValueAMD64_OpAMD64CMOVWCC(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMOVWCC x y (InvertFlags cond)) + // result: (CMOVWLS x y cond) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVBQZX || !(0xFF < c) { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64FlagLT_ULT) + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWLS) + v.AddArg3(x, y, cond) return true } - // match: (CMPQconst (MOVWQZX _) [c]) - // cond: 0xFFFF < c - // result: (FlagLT_ULT) + // match: (CMOVWCC _ x (FlagEQ)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVWQZX || !(0xFFFF < c) { + x := v_1 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64FlagLT_ULT) + v.copyOf(x) return true } - // match: (CMPQconst (SHRQconst _ [c]) [n]) - // cond: 0 <= n && 0 < c && c <= 64 && (1<uint16(y) - // result: (FlagLT_UGT) + // match: (CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - y := auxIntToInt16(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := auxIntToInt32(v_0.AuxInt) - if !(int16(x) < y && uint16(x) > uint16(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_UGT) - return true + break } - // match: (CMPWconst (MOVLconst [x]) [y]) - // cond: int16(x)>y && uint16(x) y && uint16(x) < uint16(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_ULT) - return true + break } - // match: (CMPWconst (MOVLconst [x]) [y]) - // cond: int16(x)>y && uint16(x)>uint16(y) - // result: (FlagGT_UGT) + // match: (CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - y := auxIntToInt16(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := auxIntToInt32(v_0.AuxInt) - if !(int16(x) > y && uint16(x) > uint16(y)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagGT_UGT) - return true + break } - // match: (CMPWconst (ANDLconst _ [m]) [n]) - // cond: 0 <= int16(m) && int16(m) < n - // result: (FlagLT_ULT) + // match: (CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - n := auxIntToInt16(v.AuxInt) - if v_0.Op != OpAMD64ANDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - m := auxIntToInt32(v_0.AuxInt) - if !(0 <= int16(m) && int16(m) < n) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64FlagLT_ULT) - return true + break } - // match: (CMPWconst a:(ANDL x y) [0]) - // cond: a.Uses == 1 - // result: (TESTW x y) + // match: (CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - if auxIntToInt16(v.AuxInt) != 0 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - a := v_0 - if a.Op != OpAMD64ANDL { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true } - y := a.Args[1] - x := a.Args[0] - if !(a.Uses == 1) { + break + } + // match: (CMOVWEQ yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.reset(OpAMD64TESTW) - v.AddArg2(x, y) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPWconst a:(ANDLconst [c] x) [0]) - // cond: a.Uses == 1 - // result: (TESTWconst [int16(c)] x) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - if auxIntToInt16(v.AuxInt) != 0 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - a := v_0 - if a.Op != OpAMD64ANDLconst { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { + break + } + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - v.reset(OpAMD64TESTWconst) - v.AuxInt = int16ToAuxInt(int16(c)) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPWconst x [0]) - // result: (TESTW x x) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - if auxIntToInt16(v.AuxInt) != 0 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - x := v_0 - v.reset(OpAMD64TESTW) - v.AddArg2(x, x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c]) - // cond: l.Uses == 1 && clobber(l) - // result: @l.Block (CMPWconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - c := auxIntToInt16(v.AuxInt) - l := v_0 - if l.Op != OpAMD64MOVWload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(l.Uses == 1 && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPWconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPWconstload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPWconstload [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (CMPWconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPWconstload) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(sym) - v.AddArg2(base, mem) - return true + break } - // match: (CMPWconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (CMPWconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPWconstload) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPWload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPWload [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (CMPWload [off1+off2] {sym} base val mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPWload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) - return true + break } - // match: (CMPWload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (CMPWload [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPWload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true + break } - // match: (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) - // result: (CMPWconstload {sym} [makeValAndOff(int32(int16(c)),off)] ptr mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64CMPWconstload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPXCHGLlock(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) + // match: (CMOVWEQ yes no t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - old := v_1 - new_ := v_2 - mem := v_3 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPXCHGLlock) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg4(ptr, old, new_, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CMPXCHGQlock(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - old := v_1 - new_ := v_2 - mem := v_3 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64CMPXCHGQlock) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg4(ptr, old, new_, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64CVTSD2SS(v *Value) bool { - v_0 := v.Args[0] - // match: (CVTSD2SS (ROUNDSD [c] (CVTSS2SD x))) - // result: (ROUNDSS [c] x) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) for { - if v_0.Op != OpAMD64ROUNDSD { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - c := auxIntToInt8(v_0.AuxInt) - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64CVTSS2SD { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - x := v_0_0.Args[0] - v.reset(OpAMD64ROUNDSS) - v.AuxInt = int8ToAuxInt(c) - v.AddArg(x) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64DIVSD(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (DIVSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (DIVSDload x [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSDload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSDload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64DIVSDload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (DIVSDload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (DIVSDload [off1+off2] {sym} val base mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSDload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) - return true + break } - // match: (DIVSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (DIVSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSDload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64DIVSS(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (DIVSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (DIVSSload x [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSSload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSSload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64DIVSSload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (DIVSSload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (DIVSSload [off1+off2] {sym} val base mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSSload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) - return true + break } - // match: (DIVSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (DIVSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64DIVSSload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64HMULL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (HMULL x y) - // cond: !x.rematerializeable() && y.rematerializeable() - // result: (HMULL y x) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - x := v_0 - y := v_1 - if !(!x.rematerializeable() && y.rematerializeable()) { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64HMULL) - v.AddArg2(y, x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64HMULLU(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (HMULLU x y) - // cond: !x.rematerializeable() && y.rematerializeable() - // result: (HMULLU y x) - for { - x := v_0 - y := v_1 - if !(!x.rematerializeable() && y.rematerializeable()) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64HMULLU) - v.AddArg2(y, x) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64HMULQ(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (HMULQ x y) - // cond: !x.rematerializeable() && y.rematerializeable() - // result: (HMULQ y x) + // match: (CMOVWEQ yes no t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - x := v_0 - y := v_1 - if !(!x.rematerializeable() && y.rematerializeable()) { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64HMULQ) - v.AddArg2(y, x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64HMULQU(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (HMULQU x y) - // cond: !x.rematerializeable() && y.rematerializeable() - // result: (HMULQU y x) - for { - x := v_0 - y := v_1 - if !(!x.rematerializeable() && y.rematerializeable()) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64HMULQU) - v.AddArg2(y, x) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64KMOVBk(v *Value) bool { - v_0 := v.Args[0] - // match: (KMOVBk l:(MOVBload [off] {sym} ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (KMOVBload [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64KMOVBload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64KMOVDk(v *Value) bool { - v_0 := v.Args[0] - // match: (KMOVDk l:(MOVLload [off] {sym} ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (KMOVDload [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVLload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64KMOVDload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64KMOVQk(v *Value) bool { - v_0 := v.Args[0] - // match: (KMOVQk l:(MOVQload [off] {sym} ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (KMOVQload [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVQload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64KMOVQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64KMOVWk(v *Value) bool { - v_0 := v.Args[0] - // match: (KMOVWk l:(MOVWload [off] {sym} ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (KMOVWload [off] {sym} ptr mem) + // match: (CMOVWEQ yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - l := v_0 - if l.Op != OpAMD64MOVWload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64KMOVWload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64LEAL(v *Value) bool { - v_0 := v.Args[0] - // match: (LEAL [c] {s} (ADDLconst [d] x)) - // cond: is32Bit(int64(c)+int64(d)) - // result: (LEAL [c+d] {s} x) + // match: (CMOVWEQ yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(is32Bit(int64(c) + int64(d))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAL) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg(x) - return true + break } - // match: (LEAL [c] {s} (ADDL x y)) - // cond: x.Op != OpSB && y.Op != OpSB - // result: (LEAL1 [c] {s} x y) + // match: (CMOVWEQ yes no t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDL { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - x := v_0_0 - y := v_0_1 - if !(x.Op != OpSB && y.Op != OpSB) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { continue } - v.reset(OpAMD64LEAL1) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64LEAL1(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LEAL1 [c] {s} (ADDLconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAL1 [c+d] {s} x y) + // match: (CMOVWEQ yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { continue } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v.reset(OpAMD64LEAL1) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) return true } break } - // match: (LEAL1 [c] {s} x z:(ADDL y y)) - // cond: x != z - // result: (LEAL2 [c] {s} x y) + // match: (CMOVWEQ yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDL { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { continue } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v.reset(OpAMD64LEAL2) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) return true } break } - // match: (LEAL1 [c] {s} x (SHLLconst [2] y)) - // result: (LEAL4 [c] {s} x y) + // match: (CMOVWEQ yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { continue } - y := v_1.Args[0] - v.reset(OpAMD64LEAL4) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) return true } break } - // match: (LEAL1 [c] {s} x (SHLLconst [3] y)) - // result: (LEAL8 [c] {s} x y) + // match: (CMOVWEQ yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 3 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { continue } - y := v_1.Args[0] - v.reset(OpAMD64LEAL8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) return true } break } return false } -func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVWGE(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LEAL2 [c] {s} (ADDLconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAL2 [c+d] {s} x y) + // match: (CMOVWGE x y (InvertFlags cond)) + // result: (CMOVWLE x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDLconst { - break - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] + x := v_0 y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + if v_2.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64LEAL2) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWLE) + v.AddArg3(x, y, cond) return true } - // match: (LEAL2 [c] {s} x (ADDLconst [d] y)) - // cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB - // result: (LEAL2 [c+2*d] {s} x y) + // match: (CMOVWGE _ x (FlagEQ)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDLconst { + x := v_1 + if v_2.Op != OpAMD64FlagEQ { break } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) { + v.copyOf(x) + return true + } + // match: (CMOVWGE _ x (FlagGT_UGT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64LEAL2) - v.AuxInt = int32ToAuxInt(c + 2*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAL2 [c] {s} x z:(ADDL y y)) - // cond: x != z - // result: (LEAL4 [c] {s} x y) + // match: (CMOVWGE _ x (FlagGT_ULT)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDL { - break - } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { + x := v_1 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAL4) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAL2 [c] {s} x (SHLLconst [2] y)) - // result: (LEAL8 [c] {s} x y) + // match: (CMOVWGE y _ (FlagLT_ULT)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 { + y := v_0 + if v_2.Op != OpAMD64FlagLT_ULT { break } - y := v_1.Args[0] - v.reset(OpAMD64LEAL8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } - // match: (LEAL2 [0] {s} (ADDL x x) x) - // cond: s == nil - // result: (SHLLconst [2] x) + // match: (CMOVWGE y _ (FlagLT_UGT)) + // result: y for { - if auxIntToInt32(v.AuxInt) != 0 { - break - } - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDL { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] || x != v_1 || !(s == nil) { + y := v_0 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + v.copyOf(y) return true } return false } -func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVWGT(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LEAL4 [c] {s} (ADDLconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAL4 [c+d] {s} x y) + // match: (CMOVWGT x y (InvertFlags cond)) + // result: (CMOVWLT x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDLconst { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWLT) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVWGT y _ (FlagEQ)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64LEAL4) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } - // match: (LEAL4 [c] {s} x (ADDLconst [d] y)) - // cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB - // result: (LEAL4 [c+4*d] {s} x y) + // match: (CMOVWGT _ x (FlagGT_UGT)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDLconst { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { break } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) { + v.copyOf(x) + return true + } + // match: (CMOVWGT _ x (FlagGT_ULT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAL4) - v.AuxInt = int32ToAuxInt(c + 4*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAL4 [c] {s} x z:(ADDL y y)) - // cond: x != z - // result: (LEAL8 [c] {s} x y) + // match: (CMOVWGT y _ (FlagLT_ULT)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDL { + y := v_0 + if v_2.Op != OpAMD64FlagLT_ULT { break } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { + v.copyOf(y) + return true + } + // match: (CMOVWGT y _ (FlagLT_UGT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64LEAL8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } return false } -func rewriteValueAMD64_OpAMD64LEAL8(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVWHI(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LEAL8 [c] {s} (ADDLconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAL8 [c+d] {s} x y) + // match: (CMOVWHI x y (InvertFlags cond)) + // result: (CMOVWCS x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDLconst { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWCS) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVWHI y _ (FlagEQ)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64LEAL8) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } - // match: (LEAL8 [c] {s} x (ADDLconst [d] y)) - // cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB - // result: (LEAL8 [c+8*d] {s} x y) + // match: (CMOVWHI _ x (FlagGT_UGT)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDLconst { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { break } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) { + v.copyOf(x) + return true + } + // match: (CMOVWHI y _ (FlagGT_ULT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAL8) - v.AuxInt = int32ToAuxInt(c + 8*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } - return false -} -func rewriteValueAMD64_OpAMD64LEAQ(v *Value) bool { - v_0 := v.Args[0] - // match: (LEAQ [c] {s} (ADDQconst [d] x)) - // cond: is32Bit(int64(c)+int64(d)) - // result: (LEAQ [c+d] {s} x) + // match: (CMOVWHI y _ (FlagLT_ULT)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + y := v_0 + if v_2.Op != OpAMD64FlagLT_ULT { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(is32Bit(int64(c) + int64(d))) { + v.copyOf(y) + return true + } + // match: (CMOVWHI _ x (FlagLT_UGT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg(x) + v.copyOf(x) return true } - // match: (LEAQ [c] {s} (ADDQ x y)) - // cond: x.Op != OpSB && y.Op != OpSB - // result: (LEAQ1 [c] {s} x y) + return false +} +func rewriteValueAMD64_OpAMD64CMOVWLE(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMOVWLE x y (InvertFlags cond)) + // result: (CMOVWGE x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQ { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - x := v_0_0 - y := v_0_1 - if !(x.Op != OpSB && y.Op != OpSB) { - continue - } - v.reset(OpAMD64LEAQ1) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWGE) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVWLE _ x (FlagEQ)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagEQ { + break } - break + v.copyOf(x) + return true } - // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) + // match: (CMOVWLE y _ (FlagGT_UGT)) + // result: y for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + y := v_0 + if v_2.Op != OpAMD64FlagGT_UGT { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - x := v_0.Args[0] - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.copyOf(y) + return true + } + // match: (CMOVWLE y _ (FlagGT_ULT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg(x) + v.copyOf(y) return true } - // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWLE _ x (FlagLT_ULT)) + // result: x for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ1 { + x := v_1 + if v_2.Op != OpAMD64FlagLT_ULT { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - y := v_0.Args[1] - x := v_0.Args[0] - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.copyOf(x) + return true + } + // match: (CMOVWLE _ x (FlagLT_UGT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64LEAQ1) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) + return false +} +func rewriteValueAMD64_OpAMD64CMOVWLS(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMOVWLS x y (InvertFlags cond)) + // result: (CMOVWCC x y cond) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ2 { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - y := v_0.Args[1] - x := v_0.Args[0] - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWCC) + v.AddArg3(x, y, cond) + return true + } + // match: (CMOVWLS _ x (FlagEQ)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWLS y _ (FlagGT_UGT)) + // result: y for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ4 { + y := v_0 + if v_2.Op != OpAMD64FlagGT_UGT { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - y := v_0.Args[1] - x := v_0.Args[0] - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.copyOf(y) + return true + } + // match: (CMOVWLS _ x (FlagGT_ULT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWLS _ x (FlagLT_ULT)) + // result: x for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ8 { + x := v_1 + if v_2.Op != OpAMD64FlagLT_ULT { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - y := v_0.Args[1] - x := v_0.Args[0] - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.copyOf(x) + return true + } + // match: (CMOVWLS y _ (FlagLT_UGT)) + // result: y + for { + y := v_0 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) + v.copyOf(y) return true } return false } -func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVWLT(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAQ1 [c+d] {s} x y) + // match: (CMOVWLT x y (InvertFlags cond)) + // result: (CMOVWGT x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { - continue - } - v.reset(OpAMD64LEAQ1) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + x := v_0 + y := v_1 + if v_2.Op != OpAMD64InvertFlags { + break } - break + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWGT) + v.AddArg3(x, y, cond) + return true } - // match: (LEAQ1 [c] {s} x z:(ADDQ y y)) - // cond: x != z - // result: (LEAQ2 [c] {s} x y) + // match: (CMOVWLT y _ (FlagEQ)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDQ { - continue - } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { - continue - } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + y := v_0 + if v_2.Op != OpAMD64FlagEQ { + break } - break + v.copyOf(y) + return true } - // match: (LEAQ1 [c] {s} x (SHLQconst [2] y)) - // result: (LEAQ4 [c] {s} x y) + // match: (CMOVWLT y _ (FlagGT_UGT)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 { - continue - } - y := v_1.Args[0] - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + y := v_0 + if v_2.Op != OpAMD64FlagGT_UGT { + break } - break + v.copyOf(y) + return true } - // match: (LEAQ1 [c] {s} x (SHLQconst [3] y)) - // result: (LEAQ8 [c] {s} x y) - for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 3 { - continue - } - y := v_1.Args[0] - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true - } - break - } - // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB - // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) - for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64LEAQ { - continue - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { - continue - } - v.reset(OpAMD64LEAQ1) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) - return true - } - break - } - // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) + // match: (CMOVWLT y _ (FlagGT_ULT)) + // result: y for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64LEAQ1 { - continue - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - continue - } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) - return true + y := v_0 + if v_2.Op != OpAMD64FlagGT_ULT { + break } - break + v.copyOf(y) + return true } - // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) + // match: (CMOVWLT _ x (FlagLT_ULT)) + // result: x for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64LEAQ1 { - continue - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { - if x != v_1_0 { - continue - } - y := v_1_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - continue - } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(y, x) - return true - } + x := v_1 + if v_2.Op != OpAMD64FlagLT_ULT { + break } - break + v.copyOf(x) + return true } - // match: (LEAQ1 [0] x y) - // cond: v.Aux == nil - // result: (ADDQ x y) + // match: (CMOVWLT _ x (FlagLT_UGT)) + // result: x for { - if auxIntToInt32(v.AuxInt) != 0 { - break - } - x := v_0 - y := v_1 - if !(v.Aux == nil) { + x := v_1 + if v_2.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64ADDQ) - v.AddArg2(x, y) + v.copyOf(x) return true } return false } -func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMOVWNE(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAQ2 [c+d] {s} x y) + // match: (CMOVWNE x y (InvertFlags cond)) + // result: (CMOVWNE x y cond) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { - break - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] + x := v_0 y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + if v_2.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + cond := v_2.Args[0] + v.reset(OpAMD64CMOVWNE) + v.AddArg3(x, y, cond) return true } - // match: (LEAQ2 [c] {s} x (ADDQconst [d] y)) - // cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB - // result: (LEAQ2 [c+2*d] {s} x y) + // match: (CMOVWNE y _ (FlagEQ)) + // result: y for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) { + y := v_0 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(c + 2*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(y) return true } - // match: (LEAQ2 [c] {s} x z:(ADDQ y y)) - // cond: x != z - // result: (LEAQ4 [c] {s} x y) + // match: (CMOVWNE _ x (FlagGT_UGT)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDQ { + x := v_1 + if v_2.Op != OpAMD64FlagGT_UGT { break } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { + v.copyOf(x) + return true + } + // match: (CMOVWNE _ x (FlagGT_ULT)) + // result: x + for { + x := v_1 + if v_2.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAQ2 [c] {s} x (SHLQconst [2] y)) - // result: (LEAQ8 [c] {s} x y) + // match: (CMOVWNE _ x (FlagLT_ULT)) + // result: x for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 { + x := v_1 + if v_2.Op != OpAMD64FlagLT_ULT { break } - y := v_1.Args[0] - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) + v.copyOf(x) return true } - // match: (LEAQ2 [0] {s} (ADDQ x x) x) - // cond: s == nil - // result: (SHLQconst [2] x) + // match: (CMOVWNE _ x (FlagLT_UGT)) + // result: x for { - if auxIntToInt32(v.AuxInt) != 0 { + x := v_1 + if v_2.Op != OpAMD64FlagLT_UGT { break } - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQ { + v.copyOf(x) + return true + } + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - x := v_0.Args[1] - if x != v_0.Args[0] || x != v_1 || !(s == nil) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SHLQconst) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) - return true + break } - // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB - // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ2) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) - // cond: is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil - // result: (LEAQ4 [off1+2*off2] {sym1} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64LEAQ1 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(off1 + 2*off2) - v.Aux = symToAux(sym1) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ2 [off] {sym} x (MOVQconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*2) - // result: (LEAQ [off+int32(scale)*2] {sym} x) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - scale := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*2) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + break } - // match: (LEAQ2 [off] {sym} x (MOVLconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*2) - // result: (LEAQ [off+int32(scale)*2] {sym} x) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - scale := auxIntToInt32(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*2) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAQ4 [c+d] {s} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ4 [c] {s} x (ADDQconst [d] y)) - // cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB - // result: (LEAQ4 [c+4*d] {s} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(c + 4*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ4 [c] {s} x z:(ADDQ y y)) - // cond: x != z - // result: (LEAQ8 [c] {s} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - z := v_1 - if z.Op != OpAMD64ADDQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - y := z.Args[1] - if y != z.Args[0] || !(x != z) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(c) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB - // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ4) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) - // cond: is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil - // result: (LEAQ8 [off1+4*off2] {sym1} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64LEAQ1 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - y := v_1.Args[1] - if y != v_1.Args[0] || !(is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(off1 + 4*off2) - v.Aux = symToAux(sym1) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ4 [off] {sym} x (MOVQconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*4) - // result: (LEAQ [off+int32(scale)*4] {sym} x) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - scale := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*4)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*4) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + break } - // match: (LEAQ4 [off] {sym} x (MOVLconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*4) - // result: (LEAQ [off+int32(scale)*4] {sym} x) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNEF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - scale := auxIntToInt32(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*4)) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(yes, no, flags) + return true + } + break + } + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGTF yes no flags) + for { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*4) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y) - // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB - // result: (LEAQ8 [c+d] {s} x y) + // match: (CMOVWNE yes no t:(TESTQ x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGEF yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTQ { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(c + d) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ8 [c] {s} x (ADDQconst [d] y)) - // cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB - // result: (LEAQ8 [c+8*d] {s} x y) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) for { - c := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - d := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(c + 8*d) - v.Aux = symToAux(s) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB - // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - x := v_0.Args[0] - y := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ8) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(x, y) - return true + break } - // match: (LEAQ8 [off] {sym} x (MOVQconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*8) - // result: (LEAQ [off+int32(scale)*8] {sym} x) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - scale := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*8)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*8) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + break } - // match: (LEAQ8 [off] {sym} x (MOVLconst [scale])) - // cond: is32Bit(int64(off)+int64(scale)*8) - // result: (LEAQ [off+int32(scale)*8] {sym} x) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - scale := auxIntToInt32(v_1.AuxInt) - if !(is32Bit(int64(off) + int64(scale)*8)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64LEAQ) - v.AuxInt = int32ToAuxInt(off + int32(scale)*8) - v.Aux = symToAux(sym) - v.AddArg(x) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64LoweredPanicBoundsCR(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LoweredPanicBoundsCR [kind] {p} (MOVQconst [c]) mem) - // result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - kind := auxIntToInt64(v.AuxInt) - p := auxToPanicBoundsC(v.Aux) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt64(v_0.AuxInt) - mem := v_1 - v.reset(OpAMD64LoweredPanicBoundsCC) - v.AuxInt = int64ToAuxInt(kind) - v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c}) - v.AddArg(mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRC(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LoweredPanicBoundsRC [kind] {p} (MOVQconst [c]) mem) - // result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - kind := auxIntToInt64(v.AuxInt) - p := auxToPanicBoundsC(v.Aux) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt64(v_0.AuxInt) - mem := v_1 - v.reset(OpAMD64LoweredPanicBoundsCC) - v.AuxInt = int64ToAuxInt(kind) - v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C}) - v.AddArg(mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRR(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (LoweredPanicBoundsRR [kind] x (MOVQconst [c]) mem) - // result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - kind := auxIntToInt64(v.AuxInt) - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt64(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64LoweredPanicBoundsRC) - v.AuxInt = int64ToAuxInt(kind) - v.Aux = panicBoundsCToAux(PanicBoundsC{C: c}) - v.AddArg2(x, mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (LoweredPanicBoundsRR [kind] (MOVQconst [c]) y mem) - // result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - kind := auxIntToInt64(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - c := auxIntToInt64(v_0.AuxInt) - y := v_1 - mem := v_2 - v.reset(OpAMD64LoweredPanicBoundsCR) - v.AuxInt = int64ToAuxInt(kind) - v.Aux = panicBoundsCToAux(PanicBoundsC{C: c}) - v.AddArg2(y, mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBELstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBELstore [i] {s} p x:(BSWAPL w) mem) - // cond: x.Uses == 1 - // result: (MOVLstore [i] {s} p w mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - i := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - p := v_0 - x := v_1 - if x.Op != OpAMD64BSWAPL { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - w := x.Args[0] - mem := v_2 - if !(x.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBEQstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBEQstore [i] {s} p x:(BSWAPQ w) mem) - // cond: x.Uses == 1 - // result: (MOVQstore [i] {s} p w mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - i := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - p := v_0 - x := v_1 - if x.Op != OpAMD64BSWAPQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - w := x.Args[0] - mem := v_2 - if !(x.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVQstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBEWstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) - // cond: x.Uses == 1 - // result: (MOVWstore [i] {s} p w mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQF yes no flags) for { - i := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - p := v_0 - x := v_1 - if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - w := x.Args[0] - mem := v_2 - if !(x.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNEF yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGTF yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVWload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTL x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGEF yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVLload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVQload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQSX (ANDLconst [c] x)) - // cond: c & 0x80 == 0 - // result: (ANDLconst [c & 0x7f] x) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - if v_0.Op != OpAMD64ANDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(c&0x80 == 0) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c & 0x7f) - v.AddArg(x) - return true + break } - // match: (MOVBQSX (MOVBQSX x)) - // result: (MOVBQSX x) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - if v_0.Op != OpAMD64MOVBQSX { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQSX) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBQSXload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVBQSX x) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVBstore { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBQSX) - v.AddArg(x) - return true + break } - // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBQSXload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true + break } - // match: (MOVBQSXload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVQconst [int64(int8(read8(sym, int64(off))))]) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int8(read8(sym, int64(off))))) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVBload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVWload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVLload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBload [off] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - x := v_0 - if x.Op != OpAMD64MOVQload { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - // match: (MOVBQZX (ANDLconst [c] x)) - // result: (ANDLconst [c & 0xff] x) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWEQF yes no flags) for { - if v_0.Op != OpAMD64ANDLconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c & 0xff) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(yes, no, flags) + return true + } + break } - // match: (MOVBQZX (MOVBQZX x)) - // result: (MOVBQZX x) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWNEF yes no flags) for { - if v_0.Op != OpAMD64MOVBQZX { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQZX) - v.AddArg(x) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVBatomicload [off1+off2] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGTF yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBatomicload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - // match: (MOVBatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVBatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) + // match: (CMOVWNE yes no t:(TESTW x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: (CMOVWGEF yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTW { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBatomicload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVBQZX x) + // match: (CMOVWNE yes no t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWEQ yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVBstore { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBQZX) - v.AddArg(x) - return true + break } - // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVBload [off1+off2] {sym} ptr mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWNE yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLT yes no flags) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64MOVBload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true + break } - // match: (MOVBload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int32(read8(sym, int64(off)))]) + // match: (CMOVWNE yes no t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGT yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(read8(sym, int64(off)))) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(yes, no, flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem) - // cond: y.Uses == 1 - // result: (SETLstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLE yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETL { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETLstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem) - // cond: y.Uses == 1 - // result: (SETLEstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGE yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETLE { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETLEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem) - // cond: y.Uses == 1 - // result: (SETGstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWHI yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETG { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETGstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem) - // cond: y.Uses == 1 - // result: (SETGEstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWCS yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETGE { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETGEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) - // cond: y.Uses == 1 - // result: (SETEQstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWCC yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETEQ { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETEQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem) - // cond: y.Uses == 1 - // result: (SETNEstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWLS yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETNE { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem) - // cond: y.Uses == 1 - // result: (SETBstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETEQF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWEQF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETB { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem) - // cond: y.Uses == 1 - // result: (SETBEstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETNEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWNEF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETBE { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETBEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem) - // cond: y.Uses == 1 - // result: (SETAstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETGF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGTF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETA { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(yes, no, flags) + return true } - v.reset(OpAMD64SETAstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + break } - // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem) - // cond: y.Uses == 1 - // result: (SETAEstore [off] {sym} ptr x mem) + // match: (CMOVWNE yes no t:(TESTB s:(SETGEF flags) s)) + // cond: t.Block == s.Block + // result: (CMOVWGEF yes no flags) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SETAE { + yes := v_0 + no := v_1 + t := v_2 + if t.Op != OpAMD64TESTB { break } - x := y.Args[0] - mem := v_2 - if !(y.Uses == 1) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(yes, no, flags) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64CMPB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CMPB x (MOVLconst [c])) + // result: (CMPBconst x [int8(c)]) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64CMPBconst) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) return true } - // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (CMPB (MOVLconst [c]) x) + // result: (InvertFlags (CMPBconst x [int8(c)])) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVBQSX { + if v_0.Op != OpAMD64MOVLconst { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(c)) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) - // result: (MOVBstore [off] {sym} ptr x mem) + // match: (CMPB x y) + // cond: canonLessThan(x,y) + // result: (InvertFlags (CMPB y x)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVBQZX { + x := v_0 + y := v_1 + if !(canonLessThan(x, y)) { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } - // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVBstore [off1+off2] {sym} ptr val mem) + // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (CMPBload {sym} [off] ptr x mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + l := v_0 + if l.Op != OpAMD64MOVBload { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64CMPBload) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) - // result: (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) + // match: (CMPB x l:(MOVBload {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (InvertFlags (CMPBload {sym} [off] ptr x mem)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVBload { break } - c := auxIntToInt32(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(l.Pos, OpAMD64CMPBload, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg3(ptr, x, mem) + v.AddArg(v0) return true } - // match: (MOVBstore [off] {sym} ptr (MOVQconst [c]) mem) - // result: (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPBconst(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (CMPBconst (MOVLconst [x]) [y]) + // cond: int8(x)==y + // result: (FlagEQ) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - c := auxIntToInt64(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + x := auxIntToInt32(v_0.AuxInt) + if !(int8(x) == y) { + break + } + v.reset(OpAMD64FlagEQ) return true } - // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (CMPBconst (MOVLconst [x]) [y]) + // cond: int8(x)uint8(y) + // result: (FlagLT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64KMOVBi { + y := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - mask := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64KMOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + x := auxIntToInt32(v_0.AuxInt) + if !(int8(x) < y && uint8(x) > uint8(y)) { + break + } + v.reset(OpAMD64FlagLT_UGT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd32(off) - // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) + // match: (CMPBconst (MOVLconst [x]) [y]) + // cond: int8(x)>y && uint8(x) y && uint8(x) < uint8(y)) { break } - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(s) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_ULT) return true } - // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) - // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (CMPBconst (MOVLconst [x]) [y]) + // cond: int8(x)>y && uint8(x)>uint8(y) + // result: (FlagGT_UGT) for { - sc := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + y := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - off := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { + x := auxIntToInt32(v_0.AuxInt) + if !(int8(x) > y && uint8(x) > uint8(y)) { break } - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_UGT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) + // match: (CMPBconst (ANDLconst _ [m]) [n]) + // cond: 0 <= int8(m) && int8(m) < n + // result: (FlagLT_ULT) for { - x := v_0 - if x.Op != OpAMD64MOVLload { + n := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ANDLconst { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + m := auxIntToInt32(v_0.AuxInt) + if !(0 <= int8(m) && int8(m) < n) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64FlagLT_ULT) return true } - // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) + // match: (CMPBconst a:(ANDL x y) [0]) + // cond: a.Uses == 1 + // result: (TESTB x y) for { - x := v_0 - if x.Op != OpAMD64MOVQload { + if auxIntToInt8(v.AuxInt) != 0 { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + a := v_0 + if a.Op != OpAMD64ANDL { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + y := a.Args[1] + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64TESTB) + v.AddArg2(x, y) return true } - // match: (MOVLQSX (ANDLconst [c] x)) - // cond: uint32(c) & 0x80000000 == 0 - // result: (ANDLconst [c & 0x7fffffff] x) + // match: (CMPBconst a:(ANDLconst [c] x) [0]) + // cond: a.Uses == 1 + // result: (TESTBconst [int8(c)] x) for { - if v_0.Op != OpAMD64ANDLconst { + if auxIntToInt8(v.AuxInt) != 0 { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(uint32(c)&0x80000000 == 0) { + a := v_0 + if a.Op != OpAMD64ANDLconst { break } - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c & 0x7fffffff) + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64TESTBconst) + v.AuxInt = int8ToAuxInt(int8(c)) v.AddArg(x) return true } - // match: (MOVLQSX (MOVLQSX x)) - // result: (MOVLQSX x) + // match: (CMPBconst x [0]) + // result: (TESTB x x) for { - if v_0.Op != OpAMD64MOVLQSX { + if auxIntToInt8(v.AuxInt) != 0 { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVLQSX) - v.AddArg(x) + x := v_0 + v.reset(OpAMD64TESTB) + v.AddArg2(x, x) return true } - // match: (MOVLQSX (MOVWQSX x)) - // result: (MOVWQSX x) + // match: (CMPBconst l:(MOVBload {sym} [off] ptr mem) [c]) + // cond: l.Uses == 1 && clobber(l) + // result: @l.Block (CMPBconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64MOVWQSX { + c := auxIntToInt8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64MOVBload { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(l.Uses == 1 && clobber(l)) { + break + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPBconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (MOVLQSX (MOVBQSX x)) - // result: (MOVBQSX x) + return false +} +func rewriteValueAMD64_OpAMD64CMPBconstload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMPBconstload [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (CMPBconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - if v_0.Op != OpAMD64MOVBQSX { + valoff1 := auxIntToValAndOff(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQSX) - v.AddArg(x) + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { + break + } + v.reset(OpAMD64CMPBconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) + return true + } + // match: (CMPBconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (CMPBconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + for { + valoff1 := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64CMPBconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPBload(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVLQSX x) + // match: (CMPBload [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (CMPBload [off1+off2] {sym} base val mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLstore { + if v_0.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVLQSX) - v.AddArg(x) + v.reset(OpAMD64CMPBload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // match: (CMPBload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // result: (CMPBload [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -16147,385 +21019,372 @@ func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - mem := v_1 + val := v_1 + mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVLQSXload) + v.reset(OpAMD64CMPBload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVLQSXload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVQconst [int64(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))]) + // match: (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) + // result: (CMPBconstload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64CMPBconstload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPL(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLload [off] {sym} ptr mem) + // match: (CMPL x (MOVLconst [c])) + // result: (CMPLconst x [c]) for { x := v_0 - if x.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + if v_1.Op != OpAMD64MOVLconst { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64CMPLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) return true } - // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLload [off] {sym} ptr mem) + // match: (CMPL (MOVLconst [c]) x) + // result: (InvertFlags (CMPLconst x [c])) for { - x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + if v_0.Op != OpAMD64MOVLconst { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(c) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVLQZX (ANDLconst [c] x)) - // result: (ANDLconst [c] x) + // match: (CMPL x y) + // cond: canonLessThan(x,y) + // result: (InvertFlags (CMPL y x)) for { - if v_0.Op != OpAMD64ANDLconst { + x := v_0 + y := v_1 + if !(canonLessThan(x, y)) { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } - // match: (MOVLQZX (MOVLQZX x)) - // result: (MOVLQZX x) + // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (CMPLload {sym} [off] ptr x mem) for { - if v_0.Op != OpAMD64MOVLQZX { + l := v_0 + if l.Op != OpAMD64MOVLload { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVLQZX) - v.AddArg(x) - return true - } - // match: (MOVLQZX (MOVWQZX x)) - // result: (MOVWQZX x) - for { - if v_0.Op != OpAMD64MOVWQZX { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQZX) - v.AddArg(x) + v.reset(OpAMD64CMPLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVLQZX (MOVBQZX x)) - // result: (MOVBQZX x) + // match: (CMPL x l:(MOVLload {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (InvertFlags (CMPLload {sym} [off] ptr x mem)) for { - if v_0.Op != OpAMD64MOVBQZX { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVLload { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQZX) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(l.Pos, OpAMD64CMPLload, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg3(ptr, x, mem) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64CMPLconst(v *Value) bool { v_0 := v.Args[0] - // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVLatomicload [off1+off2] {sym} ptr mem) + b := v.Block + // match: (CMPLconst (MOVLconst [x]) [y]) + // cond: x==y + // result: (FlagEQ) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + x := auxIntToInt32(v_0.AuxInt) + if !(x == y) { break } - v.reset(OpAMD64MOVLatomicload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagEQ) return true } - // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVLatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) + // match: (CMPLconst (MOVLconst [x]) [y]) + // cond: x (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) + // match: (CMPLconst (MOVLconst [x]) [y]) + // cond: xuint32(y) + // result: (FlagLT_UGT) for { - t := v.Type - if v_0.Op != OpArg { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - u := v_0.Type - off := auxIntToInt32(v_0.AuxInt) - sym := auxToSym(v_0.Aux) - if !(t.Size() == u.Size()) { + x := auxIntToInt32(v_0.AuxInt) + if !(x < y && uint32(x) > uint32(y)) { break } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) + v.reset(OpAMD64FlagLT_UGT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLi2f(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVLi2f (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) + // match: (CMPLconst (MOVLconst [x]) [y]) + // cond: x>y && uint32(x) y && uint32(x) < uint32(y)) { break } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) + v.reset(OpAMD64FlagGT_ULT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVLQZX x) + // match: (CMPLconst (MOVLconst [x]) [y]) + // cond: x>y && uint32(x)>uint32(y) + // result: (FlagGT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLstore { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + x := auxIntToInt32(v_0.AuxInt) + if !(x > y && uint32(x) > uint32(y)) { break } - v.reset(OpAMD64MOVLQZX) - v.AddArg(x) + v.reset(OpAMD64FlagGT_UGT) return true } - // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVLload [off1+off2] {sym} ptr mem) + // match: (CMPLconst (SHRLconst _ [c]) [n]) + // cond: 0 <= n && 0 < c && c <= 32 && (1< [off] {sym} (SB) _) - // cond: symIsRO(sym) && is32BitInt(t) - // result: (MOVLconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) + // match: (CMPLconst a:(ANDLconst [c] x) [0]) + // cond: a.Uses == 1 + // result: (TESTLconst [c] x) for { - t := v.Type - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym) && is32BitInt(t)) { + if auxIntToInt32(v.AuxInt) != 0 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))) + a := v_0 + if a.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64TESTLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) return true } - // match: (MOVLload [off] {sym} (SB) _) - // cond: symIsRO(sym) && is64BitInt(t) - // result: (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) + // match: (CMPLconst x [0]) + // result: (TESTL x x) for { - t := v.Type - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym) && is64BitInt(t)) { + if auxIntToInt32(v.AuxInt) != 0 { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))) + x := v_0 + v.reset(OpAMD64TESTL) + v.AddArg2(x, x) + return true + } + // match: (CMPLconst l:(MOVLload {sym} [off] ptr mem) [c]) + // cond: l.Uses == 1 && clobber(l) + // result: @l.Block (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) + for { + c := auxIntToInt32(v.AuxInt) + l := v_0 + if l.Op != OpAMD64MOVLload { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(l.Uses == 1 && clobber(l)) { + break + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPLconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(c, off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64CMPLconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) - // result: (MOVLstore [off] {sym} ptr x mem) + // match: (CMPLconstload [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (CMPLconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - off := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLQSX { + if v_0.Op != OpAMD64ADDQconst { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(off) + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { + break + } + v.reset(OpAMD64CMPLconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.AddArg2(base, mem) return true } - // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) - // result: (MOVLstore [off] {sym} ptr x mem) + // match: (CMPLconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (CMPLconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLQZX { + valoff1 := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64CMPLconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMPLload [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVLstore [off1+off2] {sym} ptr val mem) + // result: (CMPLload [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -16533,55 +21392,21 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] + base := v_0.Args[0] val := v_1 mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVLstore) + v.reset(OpAMD64CMPLload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) - // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem) - // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (CMPLload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (CMPLload [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -16596,815 +21421,476 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVLstore) + v.reset(OpAMD64CMPLload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ADDLmodify [off] {sym} ptr x mem) + // match: (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) + // result: (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { + if v_1.Op != OpAMD64MOVLconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { - break - } - v.reset(OpAMD64ADDLmodify) - v.AuxInt = int32ToAuxInt(off) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64CMPLconstload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(c, off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ANDLmodify [off] {sym} ptr x mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CMPQ x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (CMPQconst x [int32(c)]) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { break } - v.reset(OpAMD64ANDLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64CMPQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) return true } - // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ORLmodify [off] {sym} ptr x mem) + // match: (CMPQ (MOVQconst [c]) x) + // cond: is32Bit(c) + // result: (InvertFlags (CMPQconst x [int32(c)])) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { + if v_0.Op != OpAMD64MOVQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(is32Bit(c)) { break } - v.reset(OpAMD64ORLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(int32(c)) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (XORLmodify [off] {sym} ptr x mem) + // match: (CMPQ x y) + // cond: canonLessThan(x,y) + // result: (InvertFlags (CMPQ y x)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 + x := v_0 y := v_1 - if y.Op != OpAMD64XORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { - break - } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + if !(canonLessThan(x, y)) { break } - v.reset(OpAMD64XORLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } - // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ADDLmodify [off] {sym} ptr x mem) + // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) + // cond: x==y + // result: (FlagEQ) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDL { + if v_0.Op != OpAMD64MOVQconst { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ADDLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + x := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpAMD64MOVQconst { + break } - break + y := auxIntToInt64(v_1.AuxInt) + if !(x == y) { + break + } + v.reset(OpAMD64FlagEQ) + return true } - // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (SUBLmodify [off] {sym} ptr x mem) + // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) + // cond: xuint64(y) + // result: (FlagLT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDL { + if v_0.Op != OpAMD64MOVQconst { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ANDLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ORLmodify [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORL { + x := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpAMD64MOVQconst { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ORLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (XORLmodify [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64XORL { + y := auxIntToInt64(v_1.AuxInt) + if !(x < y && uint64(x) > uint64(y)) { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64XORLmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break + v.reset(OpAMD64FlagLT_UGT) + return true } - // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ADDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) + // cond: x>y && uint64(x) y && uint64(x) < uint64(y)) { break } - v.reset(OpAMD64ADDLconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_ULT) return true } - // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ANDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPQ (MOVQconst [x]) (MOVQconst [y])) + // cond: x>y && uint64(x)>uint64(y) + // result: (FlagGT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ANDLconst { + if v_0.Op != OpAMD64MOVQconst { break } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + x := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpAMD64MOVQconst { break } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + y := auxIntToInt64(v_1.AuxInt) + if !(x > y && uint64(x) > uint64(y)) { break } - v.reset(OpAMD64ANDLconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_UGT) return true } - // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (CMPQload {sym} [off] ptr x mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ORLconst { - break - } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + l := v_0 + if l.Op != OpAMD64MOVQload { break } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64ORLconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.reset(OpAMD64CMPQload) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (XORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPQ x l:(MOVQload {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (InvertFlags (CMPQload {sym} [off] ptr x mem)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64XORLconst { - break - } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVQload { break } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(l.Pos, OpAMD64CMPQload, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg3(ptr, x, mem) + v.AddArg(v0) return true } - // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) - // result: (MOVSSstore [off] {sym} ptr val mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPQconst(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (CMPQconst (MOVQconst [x]) [y]) + // cond: x==int64(y) + // result: (FlagEQ) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLf2i { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVSSstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + x := auxIntToInt64(v_0.AuxInt) + if !(x == int64(y)) { + break + } + v.reset(OpAMD64FlagEQ) return true } - // match: (MOVLstore [i] {s} p x:(BSWAPL w) mem) - // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 - // result: (MOVBELstore [i] {s} p w mem) + // match: (CMPQconst (MOVQconst [x]) [y]) + // cond: x= 3) { + x := auxIntToInt64(v_0.AuxInt) + if !(x < int64(y) && uint64(x) < uint64(int64(y))) { break } - v.reset(OpAMD64MOVBELstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) + v.reset(OpAMD64FlagLT_ULT) return true } - // match: (MOVLstore [off] {sym} ptr (KMOVDi mask) mem) - // result: (KMOVDstore [off] {sym} ptr mask mem) + // match: (CMPQconst (MOVQconst [x]) [y]) + // cond: xuint64(int64(y)) + // result: (FlagLT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64KMOVDi { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - mask := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64KMOVDstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + x := auxIntToInt64(v_0.AuxInt) + if !(x < int64(y) && uint64(x) > uint64(int64(y))) { + break + } + v.reset(OpAMD64FlagLT_UGT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd32(off) - // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) + // match: (CMPQconst (MOVQconst [x]) [y]) + // cond: x>int64(y) && uint64(x) int64(y) && uint64(x) < uint64(int64(y))) { break } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(s) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_ULT) return true } - // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) - // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (CMPQconst (MOVQconst [x]) [y]) + // cond: x>int64(y) && uint64(x)>uint64(int64(y)) + // result: (FlagGT_UGT) for { - sc := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + y := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - off := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { + x := auxIntToInt64(v_0.AuxInt) + if !(x > int64(y) && uint64(x) > uint64(int64(y))) { break } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_UGT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVOload [off1+off2] {sym} ptr mem) + // match: (CMPQconst (MOVBQZX _) [c]) + // cond: 0xFF < c + // result: (FlagLT_ULT) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVBQZX || !(0xFF < c) { break } - v.reset(OpAMD64MOVOload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagLT_ULT) return true } - // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (CMPQconst (MOVWQZX _) [c]) + // cond: 0xFFFF < c + // result: (FlagLT_ULT) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVWQZX || !(0xFFFF < c) { break } - v.reset(OpAMD64MOVOload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.reset(OpAMD64FlagLT_ULT) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVOstore [off1+off2] {sym} ptr val mem) + // match: (CMPQconst (SHRQconst _ [c]) [n]) + // cond: 0 <= n && 0 < c && c <= 64 && (1< (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) + // match: (CMPQconst x [0]) + // result: (TESTQ x x) for { - t := v.Type - if v_0.Op != OpArg { - break - } - u := v_0.Type - off := auxIntToInt32(v_0.AuxInt) - sym := auxToSym(v_0.Aux) - if !(t.Size() == u.Size()) { + if auxIntToInt32(v.AuxInt) != 0 { break } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) + x := v_0 + v.reset(OpAMD64TESTQ) + v.AddArg2(x, x) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVQi2f (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) + // match: (CMPQconst l:(MOVQload {sym} [off] ptr mem) [c]) + // cond: l.Uses == 1 && clobber(l) + // result: @l.Block (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem) for { - t := v.Type - if v_0.Op != OpArg { + c := auxIntToInt32(v.AuxInt) + l := v_0 + if l.Op != OpAMD64MOVQload { break } - u := v_0.Type - off := auxIntToInt32(v_0.AuxInt) - sym := auxToSym(v_0.Aux) - if !(t.Size() == u.Size()) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(l.Uses == 1 && clobber(l)) { break } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPQconstload, types.TypeFlags) v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(c, off)) v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPQconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: x - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQstore { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break - } - v.copyOf(x) - return true - } - // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVQload [off1+off2] {sym} ptr mem) + // match: (CMPQconstload [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (CMPQconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] + base := v_0.Args[0] mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + if !(ValAndOff(valoff1).canAdd32(off2)) { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64CMPQconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg2(base, mem) return true } - // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (CMPQconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (CMPQconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break @@ -17413,54 +21899,24 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64CMPQconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg2(base, mem) return true } - // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) - // result: (MOVQf2i val) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVSDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { - break - } - val := v_1.Args[1] - if ptr != v_1.Args[0] { - break - } - v.reset(OpAMD64MOVQf2i) - v.AddArg(val) - return true - } - // match: (MOVQload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { - break - } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))) - return true - } return false } -func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPQload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // match: (CMPQload [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVQstore [off1+off2] {sym} ptr val mem) + // result: (CMPQload [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -17468,42 +21924,21 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] + base := v_0.Args[0] val := v_1 mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVQstore) + v.reset(OpAMD64CMPQload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) - // cond: validVal(c) - // result: (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mem := v_2 - if !(validVal(c)) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (CMPQload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (CMPQload [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -17518,616 +21953,356 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVQstore) + v.reset(OpAMD64CMPQload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ADDQmodify [off] {sym} ptr x mem) + // match: (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) + // cond: validVal(c) + // result: (CMPQconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { + if v_1.Op != OpAMD64MOVQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + if !(validVal(c)) { break } - v.reset(OpAMD64ADDQmodify) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64CMPQconstload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ANDQmodify [off] {sym} ptr x mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CMPW x (MOVLconst [c])) + // result: (CMPWconst x [int16(c)]) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { - break - } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64ANDQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64CMPWconst) + v.AuxInt = int16ToAuxInt(int16(c)) + v.AddArg(x) return true } - // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ORQmodify [off] {sym} ptr x mem) + // match: (CMPW (MOVLconst [c]) x) + // result: (InvertFlags (CMPWconst x [int16(c)])) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { - break - } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + if v_0.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64ORQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(int16(c)) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (XORQmodify [off] {sym} ptr x mem) + // match: (CMPW x y) + // cond: canonLessThan(x,y) + // result: (InvertFlags (CMPW y x)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 + x := v_0 y := v_1 - if y.Op != OpAMD64XORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { - break - } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + if !(canonLessThan(x, y)) { break } - v.reset(OpAMD64XORQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } - // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ADDQmodify [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDQ { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ADDQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (SUBQmodify [off] {sym} ptr x mem) + // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (CMPWload {sym} [off] ptr x mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SUBQ { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + l := v_0 + if l.Op != OpAMD64MOVWload { break } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + ptr := l.Args[0] + x := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SUBQmodify) + v.reset(OpAMD64CMPWload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(ptr, x, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ANDQmodify [off] {sym} ptr x mem) + // match: (CMPW x l:(MOVWload {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (InvertFlags (CMPWload {sym} [off] ptr x mem)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDQ { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVWload { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ANDQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ORQmodify [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORQ { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ORQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - break + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(l.Pos, OpAMD64CMPWload, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg3(ptr, x, mem) + v.AddArg(v0) + return true } - // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (XORQmodify [off] {sym} ptr x mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPWconst(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (CMPWconst (MOVLconst [x]) [y]) + // cond: int16(x)==y + // result: (FlagEQ) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - y := v_1 - if y.Op != OpAMD64XORQ { + y := auxIntToInt16(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64XORQmodify) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true + x := auxIntToInt32(v_0.AuxInt) + if !(int16(x) == y) { + break } - break + v.reset(OpAMD64FlagEQ) + return true } - // match: (MOVQstore {sym} [off] ptr x:(BTSQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) - // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l) - // result: (BTSQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst (MOVLconst [x]) [y]) + // cond: int16(x)uint16(y) + // result: (FlagLT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - x := v_1 - if x.Op != OpAMD64BTRQconst { - break - } - c := auxIntToInt8(x.AuxInt) - l := x.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + y := auxIntToInt16(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) { + x := auxIntToInt32(v_0.AuxInt) + if !(int16(x) < y && uint16(x) > uint16(y)) { break } - v.reset(OpAMD64BTRQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagLT_UGT) return true } - // match: (MOVQstore {sym} [off] ptr x:(BTCQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) - // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l) - // result: (BTCQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst (MOVLconst [x]) [y]) + // cond: int16(x)>y && uint16(x) y && uint16(x) < uint16(y)) { break } - v.reset(OpAMD64BTCQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_ULT) return true } - // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst (MOVLconst [x]) [y]) + // cond: int16(x)>y && uint16(x)>uint16(y) + // result: (FlagGT_UGT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ADDQconst { - break - } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + y := auxIntToInt16(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + x := auxIntToInt32(v_0.AuxInt) + if !(int16(x) > y && uint16(x) > uint16(y)) { break } - v.reset(OpAMD64ADDQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagGT_UGT) return true } - // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ANDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst (ANDLconst _ [m]) [n]) + // cond: 0 <= int16(m) && int16(m) < n + // result: (FlagLT_ULT) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ANDQconst { - break - } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + n := auxIntToInt16(v.AuxInt) + if v_0.Op != OpAMD64ANDLconst { break } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + m := auxIntToInt32(v_0.AuxInt) + if !(0 <= int16(m) && int16(m) < n) { break } - v.reset(OpAMD64ANDQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64FlagLT_ULT) return true } - // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (ORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst a:(ANDL x y) [0]) + // cond: a.Uses == 1 + // result: (TESTW x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ORQconst { + if auxIntToInt16(v.AuxInt) != 0 { break } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + a := v_0 + if a.Op != OpAMD64ANDL { break } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + y := a.Args[1] + x := a.Args[0] + if !(a.Uses == 1) { break } - v.reset(OpAMD64ORQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64TESTW) + v.AddArg2(x, y) return true } - // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) - // result: (XORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) + // match: (CMPWconst a:(ANDLconst [c] x) [0]) + // cond: a.Uses == 1 + // result: (TESTWconst [int16(c)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - a := v_1 - if a.Op != OpAMD64XORQconst { + if auxIntToInt16(v.AuxInt) != 0 { break } - c := auxIntToInt32(a.AuxInt) - l := a.Args[0] - if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + a := v_0 + if a.Op != OpAMD64ANDLconst { break } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { break } - v.reset(OpAMD64XORQconstmodify) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64TESTWconst) + v.AuxInt = int16ToAuxInt(int16(c)) + v.AddArg(x) return true } - // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) - // result: (MOVSDstore [off] {sym} ptr val mem) + // match: (CMPWconst x [0]) + // result: (TESTW x x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQf2i { + if auxIntToInt16(v.AuxInt) != 0 { break } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVSDstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + x := v_0 + v.reset(OpAMD64TESTW) + v.AddArg2(x, x) return true } - // match: (MOVQstore [i] {s} p x:(BSWAPQ w) mem) - // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 - // result: (MOVBEQstore [i] {s} p w mem) + // match: (CMPWconst l:(MOVWload {sym} [off] ptr mem) [c]) + // cond: l.Uses == 1 && clobber(l) + // result: @l.Block (CMPWconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - i := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - p := v_0 - x := v_1 - if x.Op != OpAMD64BSWAPQ { - break - } - w := x.Args[0] - mem := v_2 - if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { + c := auxIntToInt16(v.AuxInt) + l := v_0 + if l.Op != OpAMD64MOVWload { break } - v.reset(OpAMD64MOVBEQstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr (KMOVQi mask) mem) - // result: (KMOVQstore [off] {sym} ptr mask mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64KMOVQi { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(l.Uses == 1 && clobber(l)) { break } - mask := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64KMOVQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPWconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPWconstload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd32(off) - // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) + // match: (CMPWconstload [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (CMPWconstload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - sc := auxIntToValAndOff(v.AuxInt) - s := auxToSym(v.Aux) + valoff1 := auxIntToValAndOff(v.AuxInt) + sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] mem := v_1 - if !(ValAndOff(sc).canAdd32(off)) { + if !(ValAndOff(valoff1).canAdd32(off2)) { break } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(s) - v.AddArg2(ptr, mem) + v.reset(OpAMD64CMPWconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) return true } - // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) - // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (CMPWconstload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (CMPWconstload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - sc := auxIntToValAndOff(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(v_0.AuxInt) + off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] + base := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.reset(OpAMD64CMPWconstload) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) - // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) - for { - c := auxIntToValAndOff(v.AuxInt) - s := auxToSym(v.Aux) - p1 := v_0 - x := v_1 - if x.Op != OpAMD64MOVQstoreconst { - break - } - a := auxIntToValAndOff(x.AuxInt) - if auxToSym(x.Aux) != s { - break - } - mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { - break - } - v.reset(OpAMD64MOVOstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) - v.Aux = symToAux(s) - v.AddArg2(p0, mem) - return true - } - // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) - // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) - // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) - for { - a := auxIntToValAndOff(v.AuxInt) - s := auxToSym(v.Aux) - p0 := v_0 - x := v_1 - if x.Op != OpAMD64MOVQstoreconst { - break - } - c := auxIntToValAndOff(x.AuxInt) - if auxToSym(x.Aux) != s { - break - } - mem := x.Args[1] - p1 := x.Args[0] - if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { - break - } - v.reset(OpAMD64MOVOstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) - v.Aux = symToAux(s) - v.AddArg2(p0, mem) + v.AddArg2(base, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPWload(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem) + // match: (CMPWload [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVSDload [off1+off2] {sym} ptr mem) + // result: (CMPWload [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -18135,20 +22310,21 @@ func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 + base := v_0.Args[0] + val := v_1 + mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVSDload) + v.reset(OpAMD64CMPWload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // match: (CMPWload [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // result: (CMPWload [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -18158,44 +22334,44 @@ func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - mem := v_1 + val := v_1 + mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVSDload) + v.reset(OpAMD64CMPWload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) - // result: (MOVQi2f val) + // match: (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) + // result: (CMPWconstload {sym} [makeValAndOff(int32(int16(c)),off)] ptr mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64MOVQstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { - break - } - val := v_1.Args[1] - if ptr != v_1.Args[0] { + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64MOVQi2f) - v.AddArg(val) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64CMPWconstload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64CMPXCHGLlock(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // match: (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVSDstore [off1+off2] {sym} ptr val mem) + // result: (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -18204,706 +22380,810 @@ func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool { } off2 := auxIntToInt32(v_0.AuxInt) ptr := v_0.Args[0] - val := v_1 - mem := v_2 + old := v_1 + new_ := v_2 + mem := v_3 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVSDstore) + v.reset(OpAMD64CMPXCHGLlock) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + v.AddArg4(ptr, old, new_, mem) return true } - // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + return false +} +func rewriteValueAMD64_OpAMD64CMPXCHGQlock(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) for { off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + ptr := v_0.Args[0] + old := v_1 + new_ := v_2 + mem := v_3 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVSDstore) + v.reset(OpAMD64CMPXCHGQlock) v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.Aux = symToAux(sym) + v.AddArg4(ptr, old, new_, mem) return true } - // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) - // result: (MOVQstore [off] {sym} ptr val mem) + return false +} +func rewriteValueAMD64_OpAMD64CVTSD2SS(v *Value) bool { + v_0 := v.Args[0] + // match: (CVTSD2SS (ROUNDSD [c] (CVTSS2SD x))) + // result: (ROUNDSS [c] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQi2f { + if v_0.Op != OpAMD64ROUNDSD { break } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + c := auxIntToInt8(v_0.AuxInt) + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64CVTSS2SD { + break + } + x := v_0_0.Args[0] + v.reset(OpAMD64ROUNDSS) + v.AuxInt = int8ToAuxInt(c) + v.AddArg(x) return true } - // match: (MOVSDstore [off] {sym} ptr (MOVSDconst [f]) mem) - // cond: f == f - // result: (MOVQstore [off] {sym} ptr (MOVQconst [int64(math.Float64bits(f))]) mem) + return false +} +func rewriteValueAMD64_OpAMD64DIVSD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (DIVSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (DIVSDload x [off] {sym} ptr mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVSDconst { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSDload { break } - f := auxIntToFloat64(v_1.AuxInt) - mem := v_2 - if !(f == f) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } - v.reset(OpAMD64MOVQstore) + v.reset(OpAMD64DIVSDload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(int64(math.Float64bits(f))) - v.AddArg3(ptr, v0, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool { +func rewriteValueAMD64_OpAMD64DIVSDload(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem) + // match: (DIVSDload [off1] {sym} val (ADDQconst [off2] base) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVSSload [off1+off2] {sym} ptr mem) + // result: (DIVSDload [off1+off2] {sym} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVSSload) + v.reset(OpAMD64DIVSDload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(val, base, mem) return true } - // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // match: (DIVSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // result: (DIVSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVSSload) + v.reset(OpAMD64DIVSDload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.AddArg3(val, base, mem) return true } - // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) - // result: (MOVLi2f val) + return false +} +func rewriteValueAMD64_OpAMD64DIVSS(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (DIVSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (DIVSSload x [off] {sym} ptr mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSSload { break } - val := v_1.Args[1] - if ptr != v_1.Args[0] { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } - v.reset(OpAMD64MOVLi2f) - v.AddArg(val) + v.reset(OpAMD64DIVSSload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64DIVSSload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // match: (DIVSSload [off1] {sym} val (ADDQconst [off2] base) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVSSstore [off1+off2] {sym} ptr val mem) + // result: (DIVSSload [off1+off2] {sym} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - val := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVSSstore) + v.reset(OpAMD64DIVSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + v.AddArg3(val, base, mem) return true } - // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (DIVSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (DIVSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVSSstore) + v.reset(OpAMD64DIVSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) - // result: (MOVLstore [off] {sym} ptr val mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLi2f { - break - } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + v.AddArg3(val, base, mem) return true } - // match: (MOVSSstore [off] {sym} ptr (MOVSSconst [f]) mem) - // cond: f == f - // result: (MOVLstore [off] {sym} ptr (MOVLconst [int32(math.Float32bits(f))]) mem) + return false +} +func rewriteValueAMD64_OpAMD64HMULL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (HMULL x y) + // cond: !x.rematerializeable() && y.rematerializeable() + // result: (HMULL y x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVSSconst { - break - } - f := auxIntToFloat32(v_1.AuxInt) - mem := v_2 - if !(f == f) { + x := v_0 + y := v_1 + if !(!x.rematerializeable() && y.rematerializeable()) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(int32(math.Float32bits(f))) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64HMULL) + v.AddArg2(y, x) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool { +func rewriteValueAMD64_OpAMD64HMULLU(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) + // match: (HMULLU x y) + // cond: !x.rematerializeable() && y.rematerializeable() + // result: (HMULLU y x) for { x := v_0 - if x.Op != OpAMD64MOVWload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + y := v_1 + if !(!x.rematerializeable() && y.rematerializeable()) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64HMULLU) + v.AddArg2(y, x) return true } - // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64HMULQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (HMULQ x y) + // cond: !x.rematerializeable() && y.rematerializeable() + // result: (HMULQ y x) for { x := v_0 - if x.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + y := v_1 + if !(!x.rematerializeable() && y.rematerializeable()) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64HMULQ) + v.AddArg2(y, x) return true } - // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64HMULQU(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (HMULQU x y) + // cond: !x.rematerializeable() && y.rematerializeable() + // result: (HMULQU y x) for { x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + y := v_1 + if !(!x.rematerializeable() && y.rematerializeable()) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64HMULQU) + v.AddArg2(y, x) return true } - // match: (MOVWQSX (ANDLconst [c] x)) - // cond: c & 0x8000 == 0 - // result: (ANDLconst [c & 0x7fff] x) + return false +} +func rewriteValueAMD64_OpAMD64KMOVBk(v *Value) bool { + v_0 := v.Args[0] + // match: (KMOVBk l:(MOVBload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (KMOVBload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64ANDLconst { + l := v_0 + if l.Op != OpAMD64MOVBload { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(c&0x8000 == 0) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c & 0x7fff) - v.AddArg(x) + v.reset(OpAMD64KMOVBload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (MOVWQSX (MOVWQSX x)) - // result: (MOVWQSX x) + return false +} +func rewriteValueAMD64_OpAMD64KMOVDk(v *Value) bool { + v_0 := v.Args[0] + // match: (KMOVDk l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (KMOVDload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64MOVWQSX { + l := v_0 + if l.Op != OpAMD64MOVLload { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) - return true - } - // match: (MOVWQSX (MOVBQSX x)) - // result: (MOVBQSX x) - for { - if v_0.Op != OpAMD64MOVBQSX { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQSX) - v.AddArg(x) + v.reset(OpAMD64KMOVDload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64KMOVQk(v *Value) bool { v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWQSX x) + // match: (KMOVQk l:(MOVQload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (KMOVQload [off] {sym} ptr mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVWstore { + l := v_0 + if l.Op != OpAMD64MOVQload { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) + v.reset(OpAMD64KMOVQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) + return false +} +func rewriteValueAMD64_OpAMD64KMOVWk(v *Value) bool { + v_0 := v.Args[0] + // match: (KMOVWk l:(MOVWload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (KMOVWload [off] {sym} ptr mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + l := v_0 + if l.Op != OpAMD64MOVWload { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVWQSXload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.reset(OpAMD64KMOVWload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (MOVWQSXload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVQconst [int64(int16(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))]) + return false +} +func rewriteValueAMD64_OpAMD64LEAL(v *Value) bool { + v_0 := v.Args[0] + // match: (LEAL [c] {s} (ADDLconst [d] x)) + // cond: is32Bit(int64(c)+int64(d)) + // result: (LEAL [c+d] {s} x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDLconst { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int16(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))) + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(int64(c) + int64(d))) { + break + } + v.reset(OpAMD64LEAL) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg(x) return true } + // match: (LEAL [c] {s} (ADDL x y)) + // cond: x.Op != OpSB && y.Op != OpSB + // result: (LEAL1 [c] {s} x y) + for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + if !(x.Op != OpSB && y.Op != OpSB) { + continue + } + v.reset(OpAMD64LEAL1) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break + } return false } -func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool { +func rewriteValueAMD64_OpAMD64LEAL1(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) + // match: (LEAL1 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAL1 [c+d] {s} x y) for { - x := v_0 - if x.Op != OpAMD64MOVWload { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64ADDLconst { + continue + } + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + continue + } + v.reset(OpAMD64LEAL1) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break + } + // match: (LEAL1 [c] {s} x z:(ADDL y y)) + // cond: x != z + // result: (LEAL2 [c] {s} x y) + for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + z := v_1 + if z.Op != OpAMD64ADDL { + continue + } + y := z.Args[1] + if y != z.Args[0] || !(x != z) { + continue + } + v.reset(OpAMD64LEAL2) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break + } + // match: (LEAL1 [c] {s} x (SHLLconst [2] y)) + // result: (LEAL4 [c] {s} x y) + for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 { + continue + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL4) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break + } + // match: (LEAL1 [c] {s} x (SHLLconst [3] y)) + // result: (LEAL8 [c] {s} x y) + for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64LEAL2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LEAL2 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAL2 [c+d] {s} x y) + for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDLconst { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64LEAL2) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) + // match: (LEAL2 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB + // result: (LEAL2 [c+2*d] {s} x y) for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) x := v_0 - if x.Op != OpAMD64MOVLload { + if v_1.Op != OpAMD64ADDLconst { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64LEAL2) + v.AuxInt = int32ToAuxInt(c + 2*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) + // match: (LEAL2 [c] {s} x z:(ADDL y y)) + // cond: x != z + // result: (LEAL4 [c] {s} x y) for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) x := v_0 - if x.Op != OpAMD64MOVQload { + z := v_1 + if z.Op != OpAMD64ADDL { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + y := z.Args[1] + if y != z.Args[0] || !(x != z) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64LEAL4) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWQZX (ANDLconst [c] x)) - // result: (ANDLconst [c & 0xffff] x) + // match: (LEAL2 [c] {s} x (SHLLconst [2] y)) + // result: (LEAL8 [c] {s} x y) for { - if v_0.Op != OpAMD64ANDLconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64SHLLconst || auxIntToInt8(v_1.AuxInt) != 2 { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(c & 0xffff) - v.AddArg(x) + y := v_1.Args[0] + v.reset(OpAMD64LEAL8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWQZX (MOVWQZX x)) - // result: (MOVWQZX x) + // match: (LEAL2 [0] {s} (ADDL x x) x) + // cond: s == nil + // result: (SHLLconst [2] x) for { - if v_0.Op != OpAMD64MOVWQZX { + if auxIntToInt32(v.AuxInt) != 0 { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQZX) - v.AddArg(x) - return true - } - // match: (MOVWQZX (MOVBQZX x)) - // result: (MOVBQZX x) - for { - if v_0.Op != OpAMD64MOVBQZX { + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDL { break } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQZX) + x := v_0.Args[1] + if x != v_0.Args[0] || x != v_1 || !(s == nil) { + break + } + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(2) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { +func rewriteValueAMD64_OpAMD64LEAL4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWQZX x) + // match: (LEAL4 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAL4 [c+d] {s} x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVWstore { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDLconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { break } - v.reset(OpAMD64MOVWQZX) - v.AddArg(x) + v.reset(OpAMD64LEAL4) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVWload [off1+off2] {sym} ptr mem) + // match: (LEAL4 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB + // result: (LEAL4 [c+4*d] {s} x y) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) { break } - v.reset(OpAMD64MOVWload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64LEAL4) + v.AuxInt = int32ToAuxInt(c + 4*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (LEAL4 [c] {s} x z:(ADDL y y)) + // cond: x != z + // result: (LEAL8 [c] {s} x y) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + z := v_1 + if z.Op != OpAMD64ADDL { break } - v.reset(OpAMD64MOVWload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true - } - // match: (MOVWload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpSB || !(symIsRO(sym)) { + y := z.Args[1] + if y != z.Args[0] || !(x != z) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))) + v.reset(OpAMD64LEAL8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64LEAL8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) + // match: (LEAL8 [c] {s} (ADDLconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAL8 [c+d] {s} x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVWQSX { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDLconst { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVWQZX { + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64LEAL8) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) + // match: (LEAL8 [c] {s} x (ADDLconst [d] y)) + // cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB + // result: (LEAL8 [c+8*d] {s} x y) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + v.reset(OpAMD64LEAL8) + v.AuxInt = int32ToAuxInt(c + 8*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) - // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64LEAQ(v *Value) bool { + v_0 := v.Args[0] + // match: (LEAQ [c] {s} (ADDQconst [d] x)) + // cond: is32Bit(int64(c)+int64(d)) + // result: (LEAQ [c+d] {s} x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - c := auxIntToInt32(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(int64(c) + int64(d))) { + break + } + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg(x) return true } - // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem) - // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) + // match: (LEAQ [c] {s} (ADDQ x y)) + // cond: x.Op != OpSB && y.Op != OpSB + // result: (LEAQ1 [c] {s} x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQ { break } - c := auxIntToInt64(v_1.AuxInt) - mem := v_2 - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + if !(x.Op != OpSB && y.Op != OpSB) { + continue + } + v.reset(OpAMD64LEAQ1) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true + } + break } - // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -18912,797 +23192,1063 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { } off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 + x := v_0.Args[0] if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstore) + v.reset(OpAMD64LEAQ) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg(x) return true } - // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) - // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 - // result: (MOVBEWstore [i] {s} p w mem) + // match: (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - i := auxIntToInt32(v.AuxInt) - s := auxToSym(v.Aux) - p := v_0 - x := v_1 - if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ1 { break } - w := x.Args[0] - mem := v_2 - if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + y := v_0.Args[1] + x := v_0.Args[0] + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVBEWstore) - v.AuxInt = int32ToAuxInt(i) - v.Aux = symToAux(s) - v.AddArg3(p, w, mem) + v.reset(OpAMD64LEAQ1) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - // match: (MOVWstore [off] {sym} ptr (KMOVWi mask) mem) - // result: (KMOVWstore [off] {sym} ptr mask mem) + // match: (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64KMOVWi { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ2 { break } - mask := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64KMOVWstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + y := v_0.Args[1] + x := v_0.Args[0] + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd32(off) - // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) + // match: (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - sc := auxIntToValAndOff(v.AuxInt) - s := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ4 { break } - off := auxIntToInt32(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd32(off)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + y := v_0.Args[1] + x := v_0.Args[0] + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) - v.Aux = symToAux(s) - v.AddArg2(ptr, mem) + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) - // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - sc := auxIntToValAndOff(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64LEAQ8 { break } - off := auxIntToInt32(v_0.AuxInt) + off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { + y := v_0.Args[1] + x := v_0.Args[0] + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(ptr, mem) + v.AddArg2(x, y) return true } return false } -func rewriteValueAMD64_OpAMD64MULL(v *Value) bool { +func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MULL x (MOVLconst [c])) - // result: (MULLconst [c] x) + // match: (LEAQ1 [c] {s} (ADDQconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAQ1 [c+d] {s} x y) for { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64ADDQconst { continue } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64MULLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + continue + } + v.reset(OpAMD64LEAQ1) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MULLconst [c] (MULLconst [d] x)) - // result: (MULLconst [c * d] x) + // match: (LEAQ1 [c] {s} x z:(ADDQ y y)) + // cond: x != z + // result: (LEAQ2 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MULLconst { - break - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64MULLconst) - v.AuxInt = int32ToAuxInt(c * d) - v.AddArg(x) - return true - } - // match: (MULLconst [ 0] _) - // result: (MOVLconst [0]) - for { - if auxIntToInt32(v.AuxInt) != 0 { - break + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + z := v_1 + if z.Op != OpAMD64ADDQ { + continue + } + y := z.Args[1] + if y != z.Args[0] || !(x != z) { + continue + } + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true + break } - // match: (MULLconst [ 1] x) - // result: x + // match: (LEAQ1 [c] {s} x (SHLQconst [2] y)) + // result: (LEAQ4 [c] {s} x y) for { - if auxIntToInt32(v.AuxInt) != 1 { - break + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 { + continue + } + y := v_1.Args[0] + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true } - x := v_0 - v.copyOf(x) - return true + break } - // match: (MULLconst [c] x) - // cond: v.Type.Size() <= 4 && canMulStrengthReduce32(config, c) - // result: {mulStrengthReduce32(v, x, c)} + // match: (LEAQ1 [c] {s} x (SHLQconst [3] y)) + // result: (LEAQ8 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(v.Type.Size() <= 4 && canMulStrengthReduce32(config, c)) { - break + s := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[0] + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) + return true } - v.copyOf(mulStrengthReduce32(v, x, c)) - return true + break } - // match: (MULLconst [c] (MOVLconst [d])) - // result: (MOVLconst [c*d]) + // match: (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB + // result: (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { - break + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64LEAQ { + continue + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { + continue + } + v.reset(OpAMD64LEAQ1) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) + return true } - d := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(c * d) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MULQ x (MOVQconst [c])) - // cond: is32Bit(c) - // result: (MULQconst [int32(c)] x) + // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_1.Op != OpAMD64LEAQ1 { continue } - c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c)) { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { continue } - v.reset(OpAMD64MULQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg(x) + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } break } + // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { + continue + } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if x != v_1_0 { + continue + } + y := v_1_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + continue + } + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(y, x) + return true + } + } + break + } + // match: (LEAQ1 [0] x y) + // cond: v.Aux == nil + // result: (ADDQ x y) + for { + if auxIntToInt32(v.AuxInt) != 0 { + break + } + x := v_0 + y := v_1 + if !(v.Aux == nil) { + break + } + v.reset(OpAMD64ADDQ) + v.AddArg2(x, y) + return true + } return false } -func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MULQconst [c] (MULQconst [d] x)) - // cond: is32Bit(int64(c)*int64(d)) - // result: (MULQconst [c * d] x) + // match: (LEAQ2 [c] {s} (ADDQconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAQ2 [c+d] {s} x y) for { c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MULQconst { + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } d := auxIntToInt32(v_0.AuxInt) x := v_0.Args[0] - if !(is32Bit(int64(c) * int64(d))) { + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { break } - v.reset(OpAMD64MULQconst) - v.AuxInt = int32ToAuxInt(c * d) - v.AddArg(x) + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MULQconst [ 0] _) - // result: (MOVQconst [0]) + // match: (LEAQ2 [c] {s} x (ADDQconst [d] y)) + // cond: is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB + // result: (LEAQ2 [c+2*d] {s} x y) for { - if auxIntToInt32(v.AuxInt) != 0 { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) - return true - } - // match: (MULQconst [ 1] x) - // result: x - for { - if auxIntToInt32(v.AuxInt) != 1 { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB) { break } - x := v_0 - v.copyOf(x) + v.reset(OpAMD64LEAQ2) + v.AuxInt = int32ToAuxInt(c + 2*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MULQconst [c] x) - // cond: canMulStrengthReduce(config, int64(c)) - // result: {mulStrengthReduce(v, x, int64(c))} + // match: (LEAQ2 [c] {s} x z:(ADDQ y y)) + // cond: x != z + // result: (LEAQ4 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) x := v_0 - if !(canMulStrengthReduce(config, int64(c))) { + z := v_1 + if z.Op != OpAMD64ADDQ { break } - v.copyOf(mulStrengthReduce(v, x, int64(c))) - return true - } - // match: (MULQconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(c)*d]) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + y := z.Args[1] + if y != z.Args[0] || !(x != z) { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(c) * d) + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MULQconst [c] (NEGQ x)) - // cond: c != -(1<<31) - // result: (MULQconst [-c] x) + // match: (LEAQ2 [c] {s} x (SHLQconst [2] y)) + // result: (LEAQ8 [c] {s} x y) for { c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64NEGQ { - break - } - x := v_0.Args[0] - if !(c != -(1 << 31)) { + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64SHLQconst || auxIntToInt8(v_1.AuxInt) != 2 { break } - v.reset(OpAMD64MULQconst) - v.AuxInt = int32ToAuxInt(-c) - v.AddArg(x) + y := v_1.Args[0] + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - return false -} -func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (MULSDload x [off] {sym} ptr mem) + // match: (LEAQ2 [0] {s} (ADDQ x x) x) + // cond: s == nil + // result: (SHLQconst [2] x) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSDload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64MULSDload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if auxIntToInt32(v.AuxInt) != 0 { + break } - break + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQ { + break + } + x := v_0.Args[1] + if x != v_0.Args[0] || x != v_1 || !(s == nil) { + break + } + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true } - return false -} -func rewriteValueAMD64_OpAMD64MULSDload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MULSDload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MULSDload [off1+off2] {sym} val base mem) + // match: (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB + // result: (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) for { off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { break } - v.reset(OpAMD64MULSDload) + v.reset(OpAMD64LEAQ2) v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - // match: (MULSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MULSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil + // result: (LEAQ4 [off1+2*off2] {sym1} x y) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { break } off2 := auxIntToInt32(v_1.AuxInt) sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil) { break } - v.reset(OpAMD64MULSDload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(off1 + 2*off2) + v.Aux = symToAux(sym1) + v.AddArg2(x, y) return true } - // match: (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) - // result: (MULSD x (MOVQi2f y)) + // match: (LEAQ2 [off] {sym} x (MOVQconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*2) + // result: (LEAQ [off+int32(scale)*2] {sym} x) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + if v_1.Op != OpAMD64MOVQconst { break } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + scale := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*2)) { break } - v.reset(OpAMD64MULSD) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*2) + v.Aux = symToAux(sym) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (MULSSload x [off] {sym} ptr mem) + // match: (LEAQ2 [off] {sym} x (MOVLconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*2) + // result: (LEAQ [off+int32(scale)*2] {sym} x) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSSload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64MULSSload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break } - break + scale := auxIntToInt32(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*2)) { + break + } + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*2) + v.Aux = symToAux(sym) + v.AddArg(x) + return true } return false } -func rewriteValueAMD64_OpAMD64MULSSload(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MULSSload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (MULSSload [off1+off2] {sym} val base mem) + // match: (LEAQ4 [c] {s} (ADDQconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAQ4 [c+d] {s} x y) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { break } - v.reset(OpAMD64MULSSload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MULSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (MULSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (LEAQ4 [c] {s} x (ADDQconst [d] y)) + // cond: is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB + // result: (LEAQ4 [c+4*d] {s} x y) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB) { break } - v.reset(OpAMD64MULSSload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(c + 4*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) - // result: (MULSS x (MOVLi2f y)) + // match: (LEAQ4 [c] {s} x z:(ADDQ y y)) + // cond: x != z + // result: (LEAQ8 [c] {s} x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + z := v_1 + if z.Op != OpAMD64ADDQ { break } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + y := z.Args[1] + if y != z.Args[0] || !(x != z) { break } - v.reset(OpAMD64MULSS) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(c) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - return false -} -func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool { - v_0 := v.Args[0] - // match: (NEGL (NEGL x)) - // result: x + // match: (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB + // result: (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - if v_0.Op != OpAMD64NEGL { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) x := v_0.Args[0] - v.copyOf(x) + y := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAQ4) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - // match: (NEGL s:(SUBL x y)) - // cond: s.Uses == 1 - // result: (SUBL y x) + // match: (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil + // result: (LEAQ8 [off1+4*off2] {sym1} x y) for { - s := v_0 - if s.Op != OpAMD64SUBL { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { break } - y := s.Args[1] - x := s.Args[0] - if !(s.Uses == 1) { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil) { break } - v.reset(OpAMD64SUBL) - v.AddArg2(y, x) + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(off1 + 4*off2) + v.Aux = symToAux(sym1) + v.AddArg2(x, y) return true } - // match: (NEGL (MOVLconst [c])) - // result: (MOVLconst [-c]) + // match: (LEAQ4 [off] {sym} x (MOVQconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*4) + // result: (LEAQ [off+int32(scale)*4] {sym} x) for { - if v_0.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - c := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(-c) + scale := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*4)) { + break + } + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*4) + v.Aux = symToAux(sym) + v.AddArg(x) + return true + } + // match: (LEAQ4 [off] {sym} x (MOVLconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*4) + // result: (LEAQ [off+int32(scale)*4] {sym} x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break + } + scale := auxIntToInt32(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*4)) { + break + } + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*4) + v.Aux = symToAux(sym) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NEGQ (NEGQ x)) - // result: x + // match: (LEAQ8 [c] {s} (ADDQconst [d] x) y) + // cond: is32Bit(int64(c)+int64(d)) && x.Op != OpSB + // result: (LEAQ8 [c+d] {s} x y) for { - if v_0.Op != OpAMD64NEGQ { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } + d := auxIntToInt32(v_0.AuxInt) x := v_0.Args[0] - v.copyOf(x) + y := v_1 + if !(is32Bit(int64(c)+int64(d)) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(c + d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (NEGQ s:(SUBQ x y)) - // cond: s.Uses == 1 - // result: (SUBQ y x) + // match: (LEAQ8 [c] {s} x (ADDQconst [d] y)) + // cond: is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB + // result: (LEAQ8 [c+8*d] {s} x y) for { - s := v_0 - if s.Op != OpAMD64SUBQ { + c := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := s.Args[1] - x := s.Args[0] - if !(s.Uses == 1) { + d := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB) { break } - v.reset(OpAMD64SUBQ) - v.AddArg2(y, x) + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(c + 8*d) + v.Aux = symToAux(s) + v.AddArg2(x, y) return true } - // match: (NEGQ (MOVQconst [c])) - // result: (MOVQconst [-c]) + // match: (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB + // result: (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) for { - if v_0.Op != OpAMD64MOVQconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(-c) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + x := v_0.Args[0] + y := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB) { + break + } + v.reset(OpAMD64LEAQ8) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(x, y) return true } - // match: (NEGQ (ADDQconst [c] (NEGQ x))) - // cond: c != -(1<<31) - // result: (ADDQconst [-c] x) + // match: (LEAQ8 [off] {sym} x (MOVQconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*8) + // result: (LEAQ [off+int32(scale)*8] {sym} x) for { - if v_0.Op != OpAMD64ADDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - c := auxIntToInt32(v_0.AuxInt) - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64NEGQ { + scale := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*8)) { break } - x := v_0_0.Args[0] - if !(c != -(1 << 31)) { + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*8) + v.Aux = symToAux(sym) + v.AddArg(x) + return true + } + // match: (LEAQ8 [off] {sym} x (MOVLconst [scale])) + // cond: is32Bit(int64(off)+int64(scale)*8) + // result: (LEAQ [off+int32(scale)*8] {sym} x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(-c) + scale := auxIntToInt32(v_1.AuxInt) + if !(is32Bit(int64(off) + int64(scale)*8)) { + break + } + v.reset(OpAMD64LEAQ) + v.AuxInt = int32ToAuxInt(off + int32(scale)*8) + v.Aux = symToAux(sym) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool { +func rewriteValueAMD64_OpAMD64LoweredPanicBoundsCR(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NOTL (MOVLconst [c])) - // result: (MOVLconst [^c]) + // match: (LoweredPanicBoundsCR [kind] {p} (MOVQconst [c]) mem) + // result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem) for { - if v_0.Op != OpAMD64MOVLconst { + kind := auxIntToInt64(v.AuxInt) + p := auxToPanicBoundsC(v.Aux) + if v_0.Op != OpAMD64MOVQconst { break } - c := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(^c) + c := auxIntToInt64(v_0.AuxInt) + mem := v_1 + v.reset(OpAMD64LoweredPanicBoundsCC) + v.AuxInt = int64ToAuxInt(kind) + v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c}) + v.AddArg(mem) return true } return false } -func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRC(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NOTQ (MOVQconst [c])) - // result: (MOVQconst [^c]) + // match: (LoweredPanicBoundsRC [kind] {p} (MOVQconst [c]) mem) + // result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem) for { + kind := auxIntToInt64(v.AuxInt) + p := auxToPanicBoundsC(v.Aux) if v_0.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(^c) + mem := v_1 + v.reset(OpAMD64LoweredPanicBoundsCC) + v.AuxInt = int64ToAuxInt(kind) + v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C}) + v.AddArg(mem) return true } return false } -func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { +func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRR(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ORL (SHLL (MOVLconst [1]) y) x) - // result: (BTSL x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLL { - continue - } - y := v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { - continue - } - x := v_1 - v.reset(OpAMD64BTSL) - v.AddArg2(x, y) - return true + // match: (LoweredPanicBoundsRR [kind] x (MOVQconst [c]) mem) + // result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem) + for { + kind := auxIntToInt64(v.AuxInt) + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break } - break + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64LoweredPanicBoundsRC) + v.AuxInt = int64ToAuxInt(kind) + v.Aux = panicBoundsCToAux(PanicBoundsC{C: c}) + v.AddArg2(x, mem) + return true } - // match: (ORL x (MOVLconst [c])) - // result: (ORLconst [c] x) + // match: (LoweredPanicBoundsRR [kind] (MOVQconst [c]) y mem) + // result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ORLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true + kind := auxIntToInt64(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { + break } - break + c := auxIntToInt64(v_0.AuxInt) + y := v_1 + mem := v_2 + v.reset(OpAMD64LoweredPanicBoundsCR) + v.AuxInt = int64ToAuxInt(kind) + v.Aux = panicBoundsCToAux(PanicBoundsC{C: c}) + v.AddArg2(y, mem) + return true } - // match: (ORL x x) - // result: x + return false +} +func rewriteValueAMD64_OpAMD64MOVBELstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBELstore [i] {s} p x:(BSWAPL w) mem) + // cond: x.Uses == 1 + // result: (MOVLstore [i] {s} p w mem) for { - x := v_0 - if x != v_1 { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64BSWAPL { break } - v.copyOf(x) + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) return true } - // match: (ORL x l:(MOVLload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (ORLload x [off] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVBEQstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVBEQstore [i] {s} p x:(BSWAPQ w) mem) + // cond: x.Uses == 1 + // result: (MOVQstore [i] {s} p w mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVLload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64ORLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64BSWAPQ { + break } - break + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVBEWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ORLconst [c] (ORLconst [d] x)) - // result: (ORLconst [c | d] x) + // match: (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) + // cond: x.Uses == 1 + // result: (MOVWstore [i] {s} p w mem) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64ORLconst { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64ORLconst) - v.AuxInt = int32ToAuxInt(c | d) - v.AddArg(x) + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) return true } - // match: (ORLconst [0] x) - // result: x + return false +} +func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) for { - if auxIntToInt32(v.AuxInt) != 0 { + x := v_0 + if x.Op != OpAMD64MOVBload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) + for { x := v_0 - v.copyOf(x) + if x.Op != OpAMD64MOVWload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (ORLconst [-1] _) - // result: (MOVLconst [-1]) + // match: (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) for { - if auxIntToInt32(v.AuxInt) != -1 { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(-1) + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (ORLconst [c] (MOVLconst [d])) - // result: (MOVLconst [c|d]) + // match: (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBQSXload [off] {sym} ptr mem) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - d := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(c | d) + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVBQSX (ANDLconst [c] x)) + // cond: c & 0x80 == 0 + // result: (ANDLconst [c & 0x7f] x) + for { + if v_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(c&0x80 == 0) { + break + } + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c & 0x7f) + v.AddArg(x) + return true + } + // match: (MOVBQSX (MOVBQSX x)) + // result: (MOVBQSX x) + for { + if v_0.Op != OpAMD64MOVBQSX { + break + } + x := v_0.Args[0] + v.reset(OpAMD64MOVBQSX) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64ORLconstmodify(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVBQSXload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVBQSX x) for { - valoff1 := auxIntToValAndOff(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64MOVBstore { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64ORLconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(sym) - v.AddArg2(base, mem) + v.reset(OpAMD64MOVBQSX) + v.AddArg(x) return true } - // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + // match: (MOVBQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVBQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break @@ -19711,97 +24257,157 @@ func rewriteValueAMD64_OpAMD64ORLconstmodify(v *Value) bool { sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64ORLconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.reset(OpAMD64MOVBQSXload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg2(base, mem) return true } + // match: (MOVBQSXload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVQconst [int64(int8(read8(sym, int64(off))))]) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym)) { + break + } + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(int8(read8(sym, int64(off))))) + return true + } return false } -func rewriteValueAMD64_OpAMD64ORLload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value) bool { v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (ORLload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (ORLload [off1+off2] {sym} val base mem) + // match: (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBload [off] {sym} ptr mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + x := v_0 + if x.Op != OpAMD64MOVBload { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64ORLload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (ORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBload [off] {sym} ptr mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + x := v_0 + if x.Op != OpAMD64MOVWload { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64ORLload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) - // result: ( ORL x (MOVLf2i y)) + // match: (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBload [off] {sym} ptr mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + if x.Op != OpAMD64MOVLload { break } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64ORL) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) - v0.AddArg(y) - v.AddArg2(x, v0) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVBload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVQload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVBload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVBQZX (ANDLconst [c] x)) + // result: (ANDLconst [c & 0xff] x) + for { + if v_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c & 0xff) + v.AddArg(x) + return true + } + // match: (MOVBQZX (MOVBQZX x)) + // result: (MOVBQZX x) + for { + if v_0.Op != OpAMD64MOVBQZX { + break + } + x := v_0.Args[0] + v.reset(OpAMD64MOVBQZX) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (MOVBatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (ORLmodify [off1+off2] {sym} base val mem) + // result: (MOVBatomicload [off1+off2] {sym} ptr mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -19809,21 +24415,20 @@ func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 + ptr := v_0.Args[0] + mem := v_1 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64ORLmodify) + v.reset(OpAMD64MOVBatomicload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVBatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVBatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -19832,328 +24437,69 @@ func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool { } off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 + ptr := v_0.Args[0] + mem := v_1 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64ORLmodify) + v.reset(OpAMD64MOVBatomicload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ORQ (SHLQ (MOVQconst [1]) y) x) - // result: (BTSQ x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLQ { - continue - } - y := v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { - continue - } - x := v_1 - v.reset(OpAMD64BTSQ) - v.AddArg2(x, y) - return true - } - break - } - // match: (ORQ (MOVQconst [c]) x) - // cond: isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31 - // result: (BTSQconst [int8(log64u(uint64(c)))] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_0.AuxInt) - x := v_1 - if !(isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) { - continue - } - v.reset(OpAMD64BTSQconst) - v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v.AddArg(x) - return true - } - break - } - // match: (ORQ x (MOVQconst [c])) - // cond: is32Bit(c) - // result: (ORQconst [int32(c)] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c)) { - continue - } - v.reset(OpAMD64ORQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg(x) - return true - } - break - } - // match: (ORQ x (MOVLconst [c])) - // result: (ORQconst [c] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ORQconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true - } - break - } - // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits))) - // result: (SHRDQ lo hi bits) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHRQ { - continue - } - bits := v_0.Args[1] - lo := v_0.Args[0] - if v_1.Op != OpAMD64SHLQ { - continue - } - _ = v_1.Args[1] - hi := v_1.Args[0] - v_1_1 := v_1.Args[1] - if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { - continue - } - v.reset(OpAMD64SHRDQ) - v.AddArg3(lo, hi, bits) - return true - } - break - } - // match: (ORQ (SHLQ lo bits) (SHRQ hi (NEGQ bits))) - // result: (SHLDQ lo hi bits) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLQ { - continue - } - bits := v_0.Args[1] - lo := v_0.Args[0] - if v_1.Op != OpAMD64SHRQ { - continue - } - _ = v_1.Args[1] - hi := v_1.Args[0] - v_1_1 := v_1.Args[1] - if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { - continue - } - v.reset(OpAMD64SHLDQ) - v.AddArg3(lo, hi, bits) - return true - } - break - } - // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits))) - // result: (SHRDQ lo hi bits) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHRXQ { - continue - } - bits := v_0.Args[1] - lo := v_0.Args[0] - if v_1.Op != OpAMD64SHLXQ { - continue - } - _ = v_1.Args[1] - hi := v_1.Args[0] - v_1_1 := v_1.Args[1] - if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { - continue - } - v.reset(OpAMD64SHRDQ) - v.AddArg3(lo, hi, bits) - return true - } - break - } - // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits))) - // result: (SHLDQ lo hi bits) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLXQ { - continue - } - bits := v_0.Args[1] - lo := v_0.Args[0] - if v_1.Op != OpAMD64SHRXQ { - continue - } - _ = v_1.Args[1] - hi := v_1.Args[0] - v_1_1 := v_1.Args[1] - if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { - continue - } - v.reset(OpAMD64SHLDQ) - v.AddArg3(lo, hi, bits) - return true - } - break - } - // match: (ORQ (MOVQconst [c]) (MOVQconst [d])) - // result: (MOVQconst [c|d]) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_0.AuxInt) - if v_1.Op != OpAMD64MOVQconst { - continue - } - d := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(c | d) - return true - } - break - } - // match: (ORQ x x) - // result: x + // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVBQZX x) for { - x := v_0 - if x != v_1 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVBstore { break } - v.copyOf(x) - return true - } - // match: (ORQ x l:(MOVQload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (ORQload x [off] {sym} ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVQload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64ORQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (ORQconst [c] (ORQconst [d] x)) - // result: (ORQconst [c | d] x) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64ORQconst { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64ORQconst) - v.AuxInt = int32ToAuxInt(c | d) + v.reset(OpAMD64MOVBQZX) v.AddArg(x) return true } - // match: (ORQconst [0] x) - // result: x - for { - if auxIntToInt32(v.AuxInt) != 0 { - break - } - x := v_0 - v.copyOf(x) - return true - } - // match: (ORQconst [-1] _) - // result: (MOVQconst [-1]) - for { - if auxIntToInt32(v.AuxInt) != -1 { - break - } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(-1) - return true - } - // match: (ORQconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(c)|d]) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { - break - } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(c) | d) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ORQconstmodify(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (ORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (MOVBload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVBload [off1+off2] {sym} ptr mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] + ptr := v_0.Args[0] mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64ORQconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.reset(OpAMD64MOVBload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg2(base, mem) + v.AddArg2(ptr, mem) return true } - // match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (ORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + // match: (MOVBload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break @@ -20162,1759 +24508,1808 @@ func rewriteValueAMD64_OpAMD64ORQconstmodify(v *Value) bool { sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64ORQconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.reset(OpAMD64MOVBload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg2(base, mem) return true } + // match: (MOVBload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int32(read8(sym, int64(off)))]) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym)) { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(read8(sym, int64(off)))) + return true + } return false } -func rewriteValueAMD64_OpAMD64ORQload(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (ORQload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (ORQload [off1+off2] {sym} val base mem) + // match: (MOVBstore [off] {sym} ptr y:(SETL x) mem) + // cond: y.Uses == 1 + // result: (SETLstore [off] {sym} ptr x mem) for { - off1 := auxIntToInt32(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETL { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] + x := y.Args[0] mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + if !(y.Uses == 1) { break } - v.reset(OpAMD64ORQload) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64SETLstore) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (ORQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (ORQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (MOVBstore [off] {sym} ptr y:(SETLE x) mem) + // cond: y.Uses == 1 + // result: (SETLEstore [off] {sym} ptr x mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETLE { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] + x := y.Args[0] mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if !(y.Uses == 1) { break } - v.reset(OpAMD64ORQload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.reset(OpAMD64SETLEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) - // result: ( ORQ x (MOVQf2i y)) + // match: (MOVBstore [off] {sym} ptr y:(SETG x) mem) + // cond: y.Uses == 1 + // result: (SETGstore [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETG { break } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { break } - v.reset(OpAMD64ORQ) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64SETGstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ORQmodify(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (ORQmodify [off1+off2] {sym} base val mem) + // match: (MOVBstore [off] {sym} ptr y:(SETGE x) mem) + // cond: y.Uses == 1 + // result: (SETGEstore [off] {sym} ptr x mem) for { - off1 := auxIntToInt32(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETGE { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 + x := y.Args[0] mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + if !(y.Uses == 1) { break } - v.reset(OpAMD64ORQmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64SETGEstore) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (ORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (ORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) + // cond: y.Uses == 1 + // result: (SETEQstore [off] {sym} ptr x mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETEQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 + x := y.Args[0] mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if !(y.Uses == 1) { break } - v.reset(OpAMD64ORQmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.reset(OpAMD64SETEQstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ROLB(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ROLB x (NEGQ y)) - // result: (RORB x y) + // match: (MOVBstore [off] {sym} ptr y:(SETNE x) mem) + // cond: y.Uses == 1 + // result: (SETNEstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETNE { break } - y := v_1.Args[0] - v.reset(OpAMD64RORB) - v.AddArg2(x, y) - return true - } - // match: (ROLB x (NEGL y)) - // result: (RORB x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { break } - y := v_1.Args[0] - v.reset(OpAMD64RORB) - v.AddArg2(x, y) + v.reset(OpAMD64SETNEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (ROLB x (MOVQconst [c])) - // result: (ROLBconst [int8(c&7) ] x) + // match: (MOVBstore [off] {sym} ptr y:(SETB x) mem) + // cond: y.Uses == 1 + // result: (SETBstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETB { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLBconst) - v.AuxInt = int8ToAuxInt(int8(c & 7)) - v.AddArg(x) - return true - } - // match: (ROLB x (MOVLconst [c])) - // result: (ROLBconst [int8(c&7) ] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLBconst) - v.AuxInt = int8ToAuxInt(int8(c & 7)) - v.AddArg(x) + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ROLBconst(v *Value) bool { - v_0 := v.Args[0] - // match: (ROLBconst x [0]) - // result: x + // match: (MOVBstore [off] {sym} ptr y:(SETBE x) mem) + // cond: y.Uses == 1 + // result: (SETBEstore [off] {sym} ptr x mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETBE { break } - x := v_0 - v.copyOf(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64ROLL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ROLL x (NEGQ y)) - // result: (RORL x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { break } - y := v_1.Args[0] - v.reset(OpAMD64RORL) - v.AddArg2(x, y) + v.reset(OpAMD64SETBEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (ROLL x (NEGL y)) - // result: (RORL x y) + // match: (MOVBstore [off] {sym} ptr y:(SETA x) mem) + // cond: y.Uses == 1 + // result: (SETAstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETA { break } - y := v_1.Args[0] - v.reset(OpAMD64RORL) - v.AddArg2(x, y) + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { + break + } + v.reset(OpAMD64SETAstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (ROLL x (MOVQconst [c])) - // result: (ROLLconst [int8(c&31)] x) + // match: (MOVBstore [off] {sym} ptr y:(SETAE x) mem) + // cond: y.Uses == 1 + // result: (SETAEstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SETAE { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + x := y.Args[0] + mem := v_2 + if !(y.Uses == 1) { + break + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (ROLL x (MOVLconst [c])) - // result: (ROLLconst [int8(c&31)] x) + // match: (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) + // result: (MOVBstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVBQSX { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ROLLconst(v *Value) bool { - v_0 := v.Args[0] - // match: (ROLLconst x [0]) - // result: x + // match: (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) + // result: (MOVBstore [off] {sym} ptr x mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVBQZX { break } - x := v_0 - v.copyOf(x) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ROLQ(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ROLQ x (NEGQ y)) - // result: (RORQ x y) + // match: (MOVBstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVBstore [off1+off2] {sym} ptr val mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - y := v_1.Args[0] - v.reset(OpAMD64RORQ) - v.AddArg2(x, y) + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (ROLQ x (NEGL y)) - // result: (RORQ x y) + // match: (MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) + // result: (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - y := v_1.Args[0] - v.reset(OpAMD64RORQ) - v.AddArg2(x, y) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (ROLQ x (MOVQconst [c])) - // result: (ROLQconst [int8(c&63)] x) + // match: (MOVBstore [off] {sym} ptr (MOVQconst [c]) mem) + // result: (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) for { - x := v_0 + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) + mem := v_2 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (ROLQ x (MOVLconst [c])) - // result: (ROLQconst [int8(c&63)] x) + // match: (MOVBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64ROLQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (ROLQconst x [0]) - // result: x + // match: (MOVBstore [off] {sym} ptr (KMOVBi mask) mem) + // result: (KMOVBstore [off] {sym} ptr mask mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64KMOVBi { break } - x := v_0 - v.copyOf(x) + mask := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64KMOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64ROLW(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ROLW x (NEGQ y)) - // result: (RORW x y) + // match: (MOVBstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd32(off) + // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + sc := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - y := v_1.Args[0] - v.reset(OpAMD64RORW) - v.AddArg2(x, y) - return true - } - // match: (ROLW x (NEGL y)) - // result: (RORW x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd32(off)) { break } - y := v_1.Args[0] - v.reset(OpAMD64RORW) - v.AddArg2(x, y) + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(s) + v.AddArg2(ptr, mem) return true } - // match: (ROLW x (MOVQconst [c])) - // result: (ROLWconst [int8(c&15)] x) + // match: (MOVBstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) + // result: (MOVBstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + sc := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(int8(c & 15)) - v.AddArg(x) - return true - } - // match: (ROLW x (MOVLconst [c])) - // result: (ROLWconst [int8(c&15)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(int8(c & 15)) - v.AddArg(x) + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64ROLWconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool { v_0 := v.Args[0] - // match: (ROLWconst x [0]) - // result: x + b := v.Block + // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - x := v_0 - v.copyOf(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64RORB(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (RORB x (NEGQ y)) - // result: (ROLB x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLB) - v.AddArg2(x, y) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (RORB x (NEGL y)) - // result: (ROLB x y) + // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) for { x := v_0 - if v_1.Op != OpAMD64NEGL { + if x.Op != OpAMD64MOVQload { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLB) - v.AddArg2(x, y) - return true - } - // match: (RORB x (MOVQconst [c])) - // result: (ROLBconst [int8((-c)&7) ] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLBconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 7)) - v.AddArg(x) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (RORB x (MOVLconst [c])) - // result: (ROLBconst [int8((-c)&7) ] x) + // match: (MOVLQSX (ANDLconst [c] x)) + // cond: uint32(c) & 0x80000000 == 0 + // result: (ANDLconst [c & 0x7fffffff] x) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64ANDLconst { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLBconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 7)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64RORL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (RORL x (NEGQ y)) - // result: (ROLL x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(uint32(c)&0x80000000 == 0) { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLL) - v.AddArg2(x, y) + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c & 0x7fffffff) + v.AddArg(x) return true } - // match: (RORL x (NEGL y)) - // result: (ROLL x y) + // match: (MOVLQSX (MOVLQSX x)) + // result: (MOVLQSX x) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + if v_0.Op != OpAMD64MOVLQSX { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLL) - v.AddArg2(x, y) + x := v_0.Args[0] + v.reset(OpAMD64MOVLQSX) + v.AddArg(x) return true } - // match: (RORL x (MOVQconst [c])) - // result: (ROLLconst [int8((-c)&31)] x) + // match: (MOVLQSX (MOVWQSX x)) + // result: (MOVWQSX x) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64MOVWQSX { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLLconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 31)) + x := v_0.Args[0] + v.reset(OpAMD64MOVWQSX) v.AddArg(x) return true } - // match: (RORL x (MOVLconst [c])) - // result: (ROLLconst [int8((-c)&31)] x) + // match: (MOVLQSX (MOVBQSX x)) + // result: (MOVBQSX x) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64MOVBQSX { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLLconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 31)) + x := v_0.Args[0] + v.reset(OpAMD64MOVBQSX) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64RORQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVLQSXload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RORQ x (NEGQ y)) - // result: (ROLQ x y) + b := v.Block + config := b.Func.Config + // match: (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVLQSX x) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLstore { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLQ) - v.AddArg2(x, y) - return true - } - // match: (RORQ x (NEGL y)) - // result: (ROLQ x y) - for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLQ) - v.AddArg2(x, y) + v.reset(OpAMD64MOVLQSX) + v.AddArg(x) return true } - // match: (RORQ x (MOVQconst [c])) - // result: (ROLQconst [int8((-c)&63)] x) + // match: (MOVLQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVLQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLQconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 63)) - v.AddArg(x) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVLQSXload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (RORQ x (MOVLconst [c])) - // result: (ROLQconst [int8((-c)&63)] x) + // match: (MOVLQSXload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVQconst [int64(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))]) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLQconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 63)) - v.AddArg(x) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder)))) return true } return false } -func rewriteValueAMD64_OpAMD64RORW(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool { v_0 := v.Args[0] - // match: (RORW x (NEGQ y)) - // result: (ROLW x y) + b := v.Block + // match: (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLload [off] {sym} ptr mem) for { x := v_0 - if v_1.Op != OpAMD64NEGQ { + if x.Op != OpAMD64MOVLload { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLW) - v.AddArg2(x, y) + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (RORW x (NEGL y)) - // result: (ROLW x y) + // match: (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLload [off] {sym} ptr mem) for { x := v_0 - if v_1.Op != OpAMD64NEGL { + if x.Op != OpAMD64MOVQload { break } - y := v_1.Args[0] - v.reset(OpAMD64ROLW) - v.AddArg2(x, y) + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (RORW x (MOVQconst [c])) - // result: (ROLWconst [int8((-c)&15)] x) + // match: (MOVLQZX (ANDLconst [c] x)) + // result: (ANDLconst [c] x) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64ANDLconst { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 15)) + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c) v.AddArg(x) return true } - // match: (RORW x (MOVLconst [c])) - // result: (ROLWconst [int8((-c)&15)] x) + // match: (MOVLQZX (MOVLQZX x)) + // result: (MOVLQZX x) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64MOVLQZX { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(int8((-c) & 15)) + x := v_0.Args[0] + v.reset(OpAMD64MOVLQZX) v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64SARB(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SARB x (MOVQconst [c])) - // result: (SARBconst [int8(min(int64(c)&31,7))] x) + // match: (MOVLQZX (MOVWQZX x)) + // result: (MOVWQZX x) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64MOVWQZX { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SARBconst) - v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7))) + x := v_0.Args[0] + v.reset(OpAMD64MOVWQZX) v.AddArg(x) return true } - // match: (SARB x (MOVLconst [c])) - // result: (SARBconst [int8(min(int64(c)&31,7))] x) + // match: (MOVLQZX (MOVBQZX x)) + // result: (MOVBQZX x) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64MOVBQZX { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SARBconst) - v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7))) + x := v_0.Args[0] + v.reset(OpAMD64MOVBQZX) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64SARBconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVLatomicload(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SARBconst x [0]) - // result: x + // match: (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVLatomicload [off1+off2] {sym} ptr mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0 - v.copyOf(x) + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVLatomicload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SARBconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(int8(d))>>uint64(c)]) + // match: (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVLatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) for { - c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int8(d)) >> uint64(c)) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVLatomicload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SARL(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64MOVLf2i(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (SARL x (MOVQconst [c])) - // result: (SARLconst [int8(c&31)] x) + // match: (MOVLf2i (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + t := v.Type + if v_0.Op != OpArg { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SARLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) - return true - } - // match: (SARL x (MOVLconst [c])) - // result: (SARLconst [int8(c&31)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + u := v_0.Type + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) + if !(t.Size() == u.Size()) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SARLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } - // match: (SARL x (ADDQconst [c] y)) - // cond: c & 31 == 0 - // result: (SARL x y) + return false +} +func rewriteValueAMD64_OpAMD64MOVLi2f(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVLi2f (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - x := v_0 - if v_1.Op != OpAMD64ADDQconst { + t := v.Type + if v_0.Op != OpArg { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 0) { + u := v_0.Type + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) + if !(t.Size() == u.Size()) { break } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } - // match: (SARL x (NEGQ (ADDQconst [c] y))) - // cond: c & 31 == 0 - // result: (SARL x (NEGQ y)) + return false +} +func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVLQZX x) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLstore { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 0) { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64SARL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVLQZX) + v.AddArg(x) return true } - // match: (SARL x (ANDQconst [c] y)) - // cond: c & 31 == 31 - // result: (SARL x y) + // match: (MOVLload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVLload [off1+off2] {sym} ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64ANDQconst { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 31) { + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SARL x (NEGQ (ANDQconst [c] y))) - // cond: c & 31 == 31 - // result: (SARL x (NEGQ y)) + // match: (MOVLload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDQconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 31) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SARL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (SARL x (ADDLconst [c] y)) - // cond: c & 31 == 0 - // result: (SARL x y) + // match: (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) + // result: (MOVLf2i val) for { - x := v_0 - if v_1.Op != OpAMD64ADDLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVSSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 0) { + val := v_1.Args[1] + if ptr != v_1.Args[0] { break } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVLf2i) + v.AddArg(val) return true } - // match: (SARL x (NEGL (ADDLconst [c] y))) - // cond: c & 31 == 0 - // result: (SARL x (NEGL y)) + // match: (MOVLload [off] {sym} (SB) _) + // cond: symIsRO(sym) && is32BitInt(t) + // result: (MOVLconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDLconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 0) { + t := v.Type + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym) && is32BitInt(t)) { break } - v.reset(OpAMD64SARL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))) return true } - // match: (SARL x (ANDLconst [c] y)) - // cond: c & 31 == 31 - // result: (SARL x y) + // match: (MOVLload [off] {sym} (SB) _) + // cond: symIsRO(sym) && is64BitInt(t) + // result: (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) for { - x := v_0 - if v_1.Op != OpAMD64ANDLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 31) { + t := v.Type + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym) && is64BitInt(t)) { break } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))) return true } - // match: (SARL x (NEGL (ANDLconst [c] y))) - // cond: c & 31 == 31 - // result: (SARL x (NEGL y)) + return false +} +func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) + // result: (MOVLstore [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDLconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 31) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLQSX { break } - v.reset(OpAMD64SARL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARL l:(MOVLload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SARXLload [off] {sym} ptr x mem) + // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) + // result: (MOVLstore [off] {sym} ptr x mem) for { - l := v_0 - if l.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLQZX { break } - v.reset(OpAMD64SARXLload) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVLstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SARLconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SARLconst x [0]) - // result: x + // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVLstore [off1+off2] {sym} ptr val mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0 - v.copyOf(x) + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (SARLconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(int32(d))>>uint64(c)]) + // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) + // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) for { - c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int32(d)) >> uint64(c)) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (SARQ x (MOVQconst [c])) - // result: (SARQconst [int8(c&63)] x) + // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem) + // result: (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) for { - x := v_0 + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) + mem := v_2 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SARQ x (MOVLconst [c])) - // result: (SARQconst [int8(c&63)] x) + // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SARQ x (ADDQconst [c] y)) - // cond: c & 63 == 0 - // result: (SARQ x y) + // match: (MOVLstore {sym} [off] ptr y:(ADDLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64ADDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ADDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 0) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + v.reset(OpAMD64ADDLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARQ x (NEGQ (ADDQconst [c] y))) - // cond: c & 63 == 0 - // result: (SARQ x (NEGQ y)) + // match: (MOVLstore {sym} [off] ptr y:(ANDLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ANDLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 0) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64SARQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64ANDLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARQ x (ANDQconst [c] y)) - // cond: c & 63 == 63 - // result: (SARQ x y) + // match: (MOVLstore {sym} [off] ptr y:(ORLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64ANDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 63) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + v.reset(OpAMD64ORLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARQ x (NEGQ (ANDQconst [c] y))) - // cond: c & 63 == 63 - // result: (SARQ x (NEGQ y)) + // match: (MOVLstore {sym} [off] ptr y:(XORLload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64XORLload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 63) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64SARQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64XORLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARQ x (ADDLconst [c] y)) - // cond: c & 63 == 0 - // result: (SARQ x y) + // match: (MOVLstore {sym} [off] ptr y:(ADDL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ADDLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64ADDLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ADDL { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 0) { - break + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ADDLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) - return true + break } - // match: (SARQ x (NEGL (ADDLconst [c] y))) - // cond: c & 63 == 0 - // result: (SARQ x (NEGL y)) + // match: (MOVLstore {sym} [off] ptr y:(SUBL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (SUBLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SUBL { break } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDLconst { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 0) { + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { break } - v.reset(OpAMD64SARQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64SUBLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SARQ x (ANDLconst [c] y)) - // cond: c & 63 == 63 - // result: (SARQ x y) + // match: (MOVLstore {sym} [off] ptr y:(ANDL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ANDLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64ANDLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ANDL { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 63) { - break + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ANDLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) - return true + break } - // match: (SARQ x (NEGL (ANDLconst [c] y))) - // cond: c & 63 == 63 - // result: (SARQ x (NEGL y)) + // match: (MOVLstore {sym} [off] ptr y:(ORL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ORLmodify [off] {sym} ptr x mem) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ORL { break } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDLconst { - break + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ORLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 63) { + break + } + // match: (MOVLstore {sym} [off] ptr y:(XORL l:(MOVLload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (XORLmodify [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64XORL { break } - v.reset(OpAMD64SARQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) - return true + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + break } - // match: (SARQ l:(MOVQload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SARXQload [off] {sym} ptr x mem) + // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ADDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - l := v_0 - if l.Op != OpAMD64MOVQload { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { break } - v.reset(OpAMD64SARXQload) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64ADDLconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SARQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SARQconst x [0]) - // result: x + // match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ANDLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if auxIntToInt8(v.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ANDLconst { break } - x := v_0 - v.copyOf(x) - return true - } - // match: (SARQconst [c] (MOVQconst [d])) - // result: (MOVQconst [d>>uint64(c)]) - for { - c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(d >> uint64(c)) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SARW(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SARW x (MOVQconst [c])) - // result: (SARWconst [int8(min(int64(c)&31,15))] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SARWconst) - v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15))) - v.AddArg(x) + v.reset(OpAMD64ANDLconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SARW x (MOVLconst [c])) - // result: (SARWconst [int8(min(int64(c)&31,15))] x) + // match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ORLconst { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SARWconst) - v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15))) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SARWconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SARWconst x [0]) - // result: x - for { - if auxIntToInt8(v.AuxInt) != 0 { + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - x := v_0 - v.copyOf(x) + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SARWconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(int16(d))>>uint64(c)]) + // match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (XORLconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64XORLconst { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(int16(d)) >> uint64(c)) - return true + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true } - return false -} -func rewriteValueAMD64_OpAMD64SARXLload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SARXLload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SARLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + // match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) + // result: (MOVSSstore [off] {sym} ptr val mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_1.Op != OpAMD64MOVLf2i { break } - c := auxIntToInt32(v_1.AuxInt) + val := v_1.Args[0] mem := v_2 - v.reset(OpAMD64SARLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) + v.reset(OpAMD64MOVSSstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SARXQload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SARXQload [off] {sym} ptr (MOVQconst [c]) mem) - // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (MOVLstore [i] {s} p x:(BSWAPL w) mem) + // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 + // result: (MOVBELstore [i] {s} p w mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64BSWAPL { break } - c := auxIntToInt64(v_1.AuxInt) + w := x.Args[0] mem := v_2 - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) + if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64MOVBELstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) return true } - // match: (SARXQload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (MOVLstore [off] {sym} ptr (KMOVDi mask) mem) + // result: (KMOVDstore [off] {sym} ptr mask mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_1.Op != OpAMD64KMOVDi { break } - c := auxIntToInt32(v_1.AuxInt) + mask := v_1.Args[0] mem := v_2 - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) + v.reset(OpAMD64KMOVDstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SBBLcarrymask (FlagEQ)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagEQ { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true - } - // match: (SBBLcarrymask (FlagLT_ULT)) - // result: (MOVLconst [-1]) + // match: (MOVLstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd32(off) + // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) for { - if v_0.Op != OpAMD64FlagLT_ULT { + sc := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(-1) - return true - } - // match: (SBBLcarrymask (FlagLT_UGT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagLT_UGT { + off := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(s) + v.AddArg2(ptr, mem) return true } - // match: (SBBLcarrymask (FlagGT_ULT)) - // result: (MOVLconst [-1]) + // match: (MOVLstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) + // result: (MOVLstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) for { - if v_0.Op != OpAMD64FlagGT_ULT { + sc := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(-1) - return true - } - // match: (SBBLcarrymask (FlagGT_UGT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagGT_UGT { + off := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SBBQ(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SBBQ x (MOVQconst [c]) borrow) - // cond: is32Bit(c) - // result: (SBBQconst x [int32(c)] borrow) + // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVOload [off1+off2] {sym} ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - c := auxIntToInt64(v_1.AuxInt) - borrow := v_2 - if !(is32Bit(c)) { + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SBBQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg2(x, borrow) + v.reset(OpAMD64MOVOload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SBBQ x y (FlagEQ)) - // result: (SUBQborrow x y) + // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v_0 - y := v_1 - if v_2.Op != OpAMD64FlagEQ { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64SUBQborrow) - v.AddArg2(x, y) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVOload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SBBQcarrymask(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SBBQcarrymask (FlagEQ)) - // result: (MOVQconst [0]) + b := v.Block + config := b.Func.Config + typ := &b.Func.Config.Types + // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVOstore [off1+off2] {sym} ptr val mem) for { - if v_0.Op != OpAMD64FlagEQ { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) - return true - } - // match: (SBBQcarrymask (FlagLT_ULT)) - // result: (MOVQconst [-1]) - for { - if v_0.Op != OpAMD64FlagLT_ULT { + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(-1) + v.reset(OpAMD64MOVOstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (SBBQcarrymask (FlagLT_UGT)) - // result: (MOVQconst [0]) + // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - if v_0.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) - return true - } - // match: (SBBQcarrymask (FlagGT_ULT)) - // result: (MOVQconst [-1]) - for { - if v_0.Op != OpAMD64FlagGT_ULT { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(-1) + v.reset(OpAMD64MOVOstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SBBQcarrymask (FlagGT_UGT)) - // result: (MOVQconst [0]) + // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) + // cond: symIsRO(srcSym) + // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem)) for { - if v_0.Op != OpAMD64FlagGT_UGT { + dstOff := auxIntToInt32(v.AuxInt) + dstSym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVOload { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) + srcOff := auxIntToInt32(v_1.AuxInt) + srcSym := auxToSym(v_1.Aux) + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { + break + } + mem := v_2 + if !(symIsRO(srcSym)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(dstOff + 8) + v.Aux = symToAux(dstSym) + v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))) + v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem) + v1.AuxInt = int32ToAuxInt(dstOff) + v1.Aux = symToAux(dstSym) + v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) + v2.AuxInt = int64ToAuxInt(int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))) + v1.AddArg3(ptr, v2, mem) + v.AddArg3(ptr, v0, v1) return true } return false } -func rewriteValueAMD64_OpAMD64SBBQconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVOstoreconst(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SBBQconst x [c] (FlagEQ)) - // result: (SUBQconstborrow x [c]) + // match: (MOVOstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd32(off) + // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if v_1.Op != OpAMD64FlagEQ { + sc := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64SUBQconstborrow) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SETA(v *Value) bool { - v_0 := v.Args[0] - // match: (SETA (InvertFlags x)) - // result: (SETB x) - for { - if v_0.Op != OpAMD64InvertFlags { + off := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd32(off)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETB) - v.AddArg(x) + v.reset(OpAMD64MOVOstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(s) + v.AddArg2(ptr, mem) return true } - // match: (SETA (FlagEQ)) - // result: (MOVLconst [0]) + // match: (MOVOstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) + // result: (MOVOstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) for { - if v_0.Op != OpAMD64FlagEQ { + sc := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true - } - // match: (SETA (FlagLT_ULT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagLT_ULT { + off := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVOstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } - // match: (SETA (FlagLT_UGT)) - // result: (MOVLconst [1]) + return false +} +func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVQatomicload [off1+off2] {sym} ptr mem) for { - if v_0.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETA (FlagGT_ULT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagGT_ULT { + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVQatomicload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETA (FlagGT_UGT)) - // result: (MOVLconst [1]) + // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVQatomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) for { - if v_0.Op != OpAMD64FlagGT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVQatomicload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool { v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (SETAE (TESTQ x x)) - // result: (ConstBool [true]) + // match: (MOVQf2i (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - if v_0.Op != OpAMD64TESTQ { + t := v.Type + if v_0.Op != OpArg { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + u := v_0.Type + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) + if !(t.Size() == u.Size()) { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(true) + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } - // match: (SETAE (TESTL x x)) - // result: (ConstBool [true]) + return false +} +func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVQi2f (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - if v_0.Op != OpAMD64TESTL { + t := v.Type + if v_0.Op != OpArg { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + u := v_0.Type + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) + if !(t.Size() == u.Size()) { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(true) + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } - // match: (SETAE (TESTW x x)) - // result: (ConstBool [true]) + return false +} +func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: x for { - if v_0.Op != OpAMD64TESTW { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQstore { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(true) - return true - } - // match: (SETAE (TESTB x x)) - // result: (ConstBool [true]) - for { - if v_0.Op != OpAMD64TESTB { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] { - break - } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(true) - return true - } - // match: (SETAE (BTLconst [0] x)) - // result: (XORLconst [1] (ANDLconst [1] x)) - for { - if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 { - break - } - x := v_0.Args[0] - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(1) - v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) - v0.AuxInt = int32ToAuxInt(1) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETAE (BTQconst [0] x)) - // result: (XORLconst [1] (ANDLconst [1] x)) - for { - if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 { - break - } - x := v_0.Args[0] - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(1) - v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) - v0.AuxInt = int32ToAuxInt(1) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETAE c:(CMPQconst [128] x)) - // cond: c.Uses == 1 - // result: (SETA (CMPQconst [127] x)) - for { - c := v_0 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { - break - } - v.reset(OpAMD64SETA) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETAE c:(CMPLconst [128] x)) - // cond: c.Uses == 1 - // result: (SETA (CMPLconst [127] x)) - for { - c := v_0 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { - break - } - v.reset(OpAMD64SETA) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETAE (InvertFlags x)) - // result: (SETBE x) - for { - if v_0.Op != OpAMD64InvertFlags { - break - } - x := v_0.Args[0] - v.reset(OpAMD64SETBE) - v.AddArg(x) - return true - } - // match: (SETAE (FlagEQ)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagEQ { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETAE (FlagLT_ULT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagLT_ULT { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true - } - // match: (SETAE (FlagLT_UGT)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagLT_UGT { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETAE (FlagGT_ULT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagGT_ULT { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true - } - // match: (SETAE (FlagGT_UGT)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagGT_UGT { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SETAEstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETAEstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETBEstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETBEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.copyOf(x) return true } - // match: (SETAEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETAEstore [off1+off2] {sym} base val mem) + // result: (MOVQload [off1+off2] {sym} ptr mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -21922,21 +26317,20 @@ func rewriteValueAMD64_OpAMD64SETAEstore(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 + ptr := v_0.Args[0] + mem := v_1 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETAEstore) + v.reset(OpAMD64MOVQload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (SETAEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETAEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -21946,157 +26340,98 @@ func rewriteValueAMD64_OpAMD64SETAEstore(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - val := v_1 - mem := v_2 + mem := v_1 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETAEstore) + v.reset(OpAMD64MOVQload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true - } - // match: (SETAEstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.AddArg2(base, mem) return true } - // match: (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) + // result: (MOVQf2i val) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + if v_1.Op != OpAMD64MOVSDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + val := v_1.Args[1] + if ptr != v_1.Args[0] { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64MOVQf2i) + v.AddArg(val) return true } - // match: (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVQload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))) return true } return false } -func rewriteValueAMD64_OpAMD64SETAstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETAstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETBstore [off] {sym} ptr x mem) + // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVQstore [off1+off2] {sym} ptr val mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { + if v_0.Op != OpAMD64ADDQconst { break } - x := v_1.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (SETAstore [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETAstore [off1+off2] {sym} base val mem) + // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) + // cond: validVal(c) + // result: (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) for { - off1 := auxIntToInt32(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 + c := auxIntToInt64(v_1.AuxInt) mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + if !(validVal(c)) { break } - v.reset(OpAMD64SETAstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (SETAstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETAstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -22111,379 +26446,616 @@ func rewriteValueAMD64_OpAMD64SETAstore(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETAstore) + v.reset(OpAMD64MOVQstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - // match: (SETAstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDQmodify [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + y := v_1 + if y.Op != OpAMD64ADDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ADDQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDQmodify [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + y := v_1 + if y.Op != OpAMD64ANDQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ANDQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORQmodify [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + y := v_1 + if y.Op != OpAMD64ORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64ORQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORQmodify [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + y := v_1 + if y.Op != OpAMD64XORQload || auxIntToInt32(y.AuxInt) != off || auxToSym(y.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + break + } + v.reset(OpAMD64XORQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ADDQmodify [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + y := v_1 + if y.Op != OpAMD64ADDQ { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ADDQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64SETB(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (SETB (TESTQ x x)) - // result: (ConstBool [false]) + // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (SUBQmodify [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64TESTQ { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SUBQ { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(false) - return true - } - // match: (SETB (TESTL x x)) - // result: (ConstBool [false]) - for { - if v_0.Op != OpAMD64TESTL { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(false) + v.reset(OpAMD64SUBQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (SETB (TESTW x x)) - // result: (ConstBool [false]) + // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ANDQmodify [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64TESTW { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ANDQ { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(false) - return true + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ANDQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + break } - // match: (SETB (TESTB x x)) - // result: (ConstBool [false]) + // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ORQmodify [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64TESTB { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ORQ { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + break + } + // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (XORQmodify [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + y := v_1 + if y.Op != OpAMD64XORQ { break } - v.reset(OpConstBool) - v.AuxInt = boolToAuxInt(false) - return true + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + break } - // match: (SETB (BTLconst [0] x)) - // result: (ANDLconst [1] x) + // match: (MOVQstore {sym} [off] ptr x:(BTSQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) + // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l) + // result: (BTSQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + x := v_1 + if x.Op != OpAMD64BTSQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + c := auxIntToInt8(x.AuxInt) + l := x.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) { + break + } + v.reset(OpAMD64BTSQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB (BTQconst [0] x)) - // result: (ANDQconst [1] x) + // match: (MOVQstore {sym} [off] ptr x:(BTRQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) + // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l) + // result: (BTRQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + x := v_1 + if x.Op != OpAMD64BTRQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDQconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + c := auxIntToInt8(x.AuxInt) + l := x.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) { + break + } + v.reset(OpAMD64BTRQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB c:(CMPQconst [128] x)) - // cond: c.Uses == 1 - // result: (SETBE (CMPQconst [127] x)) + // match: (MOVQstore {sym} [off] ptr x:(BTCQconst [c] l:(MOVQload {sym} [off] ptr mem)) mem) + // cond: x.Uses == 1 && l.Uses == 1 && clobber(x, l) + // result: (BTCQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - c := v_0 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + x := v_1 + if x.Op != OpAMD64BTCQconst { break } - x := c.Args[0] - if !(c.Uses == 1) { + c := auxIntToInt8(x.AuxInt) + l := x.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(x.Uses == 1 && l.Uses == 1 && clobber(x, l)) { + break + } + v.reset(OpAMD64BTCQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB c:(CMPLconst [128] x)) - // cond: c.Uses == 1 - // result: (SETBE (CMPLconst [127] x)) + // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ADDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - c := v_0 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ADDQconst { break } - x := c.Args[0] - if !(c.Uses == 1) { + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { break } - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64ADDQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB (InvertFlags x)) - // result: (SETA x) + // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ANDQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64InvertFlags { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ANDQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETA) - v.AddArg(x) + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64ANDQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB (FlagEQ)) - // result: (MOVLconst [0]) + // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (ORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64FlagEQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ORQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB (FlagLT_ULT)) - // result: (MOVLconst [1]) + // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) + // result: (XORQconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) for { - if v_0.Op != OpAMD64FlagLT_ULT { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + a := v_1 + if a.Op != OpAMD64XORQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + c := auxIntToInt32(a.AuxInt) + l := a.Args[0] + if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { + break + } + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(c), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETB (FlagLT_UGT)) - // result: (MOVLconst [0]) + // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) + // result: (MOVSDstore [off] {sym} ptr val mem) for { - if v_0.Op != OpAMD64FlagLT_UGT { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQf2i { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + val := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVSDstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (SETB (FlagGT_ULT)) - // result: (MOVLconst [1]) + // match: (MOVQstore [i] {s} p x:(BSWAPQ w) mem) + // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 + // result: (MOVBEQstore [i] {s} p w mem) for { - if v_0.Op != OpAMD64FlagGT_ULT { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64BSWAPQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64MOVBEQstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) return true } - // match: (SETB (FlagGT_UGT)) - // result: (MOVLconst [0]) + // match: (MOVQstore [off] {sym} ptr (KMOVQi mask) mem) + // result: (KMOVQstore [off] {sym} ptr mask mem) for { - if v_0.Op != OpAMD64FlagGT_UGT { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64KMOVQi { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + mask := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64KMOVQstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SETBE(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SETBE (InvertFlags x)) - // result: (SETAE x) + // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd32(off) + // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) for { - if v_0.Op != OpAMD64InvertFlags { + sc := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETAE) - v.AddArg(x) - return true - } - // match: (SETBE (FlagEQ)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagEQ { + off := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(s) + v.AddArg2(ptr, mem) return true } - // match: (SETBE (FlagLT_ULT)) - // result: (MOVLconst [1]) + // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) + // result: (MOVQstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) for { - if v_0.Op != OpAMD64FlagLT_ULT { + sc := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETBE (FlagLT_UGT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagLT_UGT { + off := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } - // match: (SETBE (FlagGT_ULT)) - // result: (MOVLconst [1]) + // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) + // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { - if v_0.Op != OpAMD64FlagGT_ULT { + c := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVQstoreconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + a := auxIntToValAndOff(x.AuxInt) + if auxToSym(x.Aux) != s { + break + } + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { + break + } + v.reset(OpAMD64MOVOstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) + v.Aux = symToAux(s) + v.AddArg2(p0, mem) return true } - // match: (SETBE (FlagGT_UGT)) - // result: (MOVLconst [0]) + // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x) + // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { - if v_0.Op != OpAMD64FlagGT_UGT { + a := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + p0 := v_0 + x := v_1 + if x.Op != OpAMD64MOVQstoreconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + c := auxIntToValAndOff(x.AuxInt) + if auxToSym(x.Aux) != s { + break + } + mem := x.Args[1] + p1 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && setPos(v, x.Pos) && clobber(x)) { + break + } + v.reset(OpAMD64MOVOstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) + v.Aux = symToAux(s) + v.AddArg2(p0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SETBEstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETBEstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETAEstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETBEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETBEstore [off1+off2] {sym} base val mem) + // result: (MOVSDload [off1+off2] {sym} ptr mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -22491,21 +27063,20 @@ func rewriteValueAMD64_OpAMD64SETBEstore(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 + ptr := v_0.Args[0] + mem := v_1 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETBEstore) + v.reset(OpAMD64MOVSDload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (SETBEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETBEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -22515,135 +27086,134 @@ func rewriteValueAMD64_OpAMD64SETBEstore(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - val := v_1 - mem := v_2 + mem := v_1 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETBEstore) + v.reset(OpAMD64MOVSDload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETBEstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) + // result: (MOVQi2f val) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + if v_1.Op != OpAMD64MOVQstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + val := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpAMD64MOVQi2f) + v.AddArg(val) return true } - // match: (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVSDstore [off1+off2] {sym} ptr val mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + if v_0.Op != OpAMD64ADDQconst { break } + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVSDstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVSDstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) + // result: (MOVQstore [off] {sym} ptr val mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + if v_1.Op != OpAMD64MOVQi2f { break } + val := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) + v.reset(OpAMD64MOVQstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVSDstore [off] {sym} ptr (MOVSDconst [f]) mem) + // cond: f == f + // result: (MOVQstore [off] {sym} ptr (MOVQconst [int64(math.Float64bits(f))]) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + if v_1.Op != OpAMD64MOVSDconst { break } + f := auxIntToFloat64(v_1.AuxInt) mem := v_2 - v.reset(OpAMD64MOVBstore) + if !(f == f) { + break + } + v.reset(OpAMD64MOVQstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(int64(math.Float64bits(f))) v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SETBstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETBstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETAstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETAstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETBstore [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETBstore [off1+off2] {sym} base val mem) + // result: (MOVSSload [off1+off2] {sym} ptr mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -22651,21 +27221,20 @@ func rewriteValueAMD64_OpAMD64SETBstore(v *Value) bool { break } off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 + ptr := v_0.Args[0] + mem := v_1 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETBstore) + v.reset(OpAMD64MOVSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (SETBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -22675,1556 +27244,1393 @@ func rewriteValueAMD64_OpAMD64SETBstore(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - val := v_1 - mem := v_2 + mem := v_1 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETBstore) + v.reset(OpAMD64MOVSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETBstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) + // result: (MOVLi2f val) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + if v_1.Op != OpAMD64MOVLstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + val := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpAMD64MOVLi2f) + v.AddArg(val) return true } - // match: (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVSSstore [off1+off2] {sym} ptr val mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + if v_0.Op != OpAMD64ADDQconst { break } + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVSSstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVSSstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) + // result: (MOVLstore [off] {sym} ptr val mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + if v_1.Op != OpAMD64MOVLi2f { break } + val := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) + v.reset(OpAMD64MOVLstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (MOVSSstore [off] {sym} ptr (MOVSSconst [f]) mem) + // cond: f == f + // result: (MOVLstore [off] {sym} ptr (MOVLconst [int32(math.Float32bits(f))]) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + if v_1.Op != OpAMD64MOVSSconst { break } + f := auxIntToFloat32(v_1.AuxInt) mem := v_2 - v.reset(OpAMD64MOVBstore) + if !(f == f) { + break + } + v.reset(OpAMD64MOVLstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(int32(math.Float32bits(f))) v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y)) - // result: (SETAE (BTL x y)) + // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if x.Op != OpAMD64MOVWload { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLL { - continue - } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { - continue - } - y := v_0_1 - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break } - break + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y)) - // result: (SETAE (BTQ x y)) + // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLQ { - continue - } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { - continue - } - y := v_0_1 - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break } - break + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - // match: (SETEQ (TESTLconst [c] x)) - // cond: isPowerOfTwo(uint32(c)) - // result: (SETAE (BTLconst [int8(log32u(uint32(c)))] x)) + // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTLconst { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint32(c))) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg(v0) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (SETEQ (TESTQconst [c] x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETAE (BTQconst [int8(log32u(uint32(c)))] x)) + // match: (MOVWQSX (ANDLconst [c] x)) + // cond: c & 0x8000 == 0 + // result: (ANDLconst [c & 0x7fff] x) for { - if v_0.Op != OpAMD64TESTQconst { + if v_0.Op != OpAMD64ANDLconst { break } c := auxIntToInt32(v_0.AuxInt) x := v_0.Args[0] - if !(isPowerOfTwo(uint64(c))) { + if !(c&0x8000 == 0) { break } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c & 0x7fff) + v.AddArg(x) return true } - // match: (SETEQ (TESTQ (MOVQconst [c]) x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETAE (BTQconst [int8(log64u(uint64(c)))] x)) + // match: (MOVWQSX (MOVWQSX x)) + // result: (MOVWQSX x) for { - if v_0.Op != OpAMD64TESTQ { + if v_0.Op != OpAMD64MOVWQSX { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_0_0.AuxInt) - x := v_0_1 - if !(isPowerOfTwo(uint64(c))) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - v.AddArg(v0) - return true + x := v_0.Args[0] + v.reset(OpAMD64MOVWQSX) + v.AddArg(x) + return true + } + // match: (MOVWQSX (MOVBQSX x)) + // result: (MOVBQSX x) + for { + if v_0.Op != OpAMD64MOVBQSX { + break } - break + x := v_0.Args[0] + v.reset(OpAMD64MOVBQSX) + v.AddArg(x) + return true } - // match: (SETEQ (CMPLconst [1] s:(ANDLconst [1] _))) - // result: (SETNE (CMPLconst [0] s)) + return false +} +func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWQSX x) for { - if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVWstore { break } - s := v_0.Args[0] - if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg(v0) + v.reset(OpAMD64MOVWQSX) + v.AddArg(x) return true } - // match: (SETEQ (CMPQconst [1] s:(ANDQconst [1] _))) - // result: (SETNE (CMPQconst [0] s)) + // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - s := v_0.Args[0] - if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg(v0) + v.reset(OpAMD64MOVWQSXload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (SETEQ (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) - // cond: z1==z2 - // result: (SETAE (BTQconst [63] x)) + // match: (MOVWQSXload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVQconst [int64(int16(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))]) for { - if v_0.Op != OpAMD64TESTQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym)) { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(int16(read16(sym, int64(off), config.ctxt.Arch.ByteOrder)))) + return true } - // match: (SETEQ (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) - // cond: z1==z2 - // result: (SETAE (BTQconst [31] x)) + return false +} +func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if x.Op != OpAMD64MOVWload { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break - } - // match: (SETEQ (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) - // cond: z1==z2 - // result: (SETAE (BTQconst [0] x)) - for { - if v_0.Op != OpAMD64TESTQ { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - // match: (SETEQ (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) - // cond: z1==z2 - // result: (SETAE (BTLconst [0] x)) + // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg(v0) - return true + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break } - break + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - // match: (SETEQ (TESTQ z1:(SHRQconst [63] x) z2)) - // cond: z1==z2 - // result: (SETAE (BTQconst [63] x)) + // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg(v0) - return true + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break } - break + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - // match: (SETEQ (TESTL z1:(SHRLconst [31] x) z2)) - // cond: z1==z2 - // result: (SETAE (BTLconst [31] x)) + // match: (MOVWQZX (ANDLconst [c] x)) + // result: (ANDLconst [c & 0xffff] x) for { - if v_0.Op != OpAMD64TESTL { + if v_0.Op != OpAMD64ANDLconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAE) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(c & 0xffff) + v.AddArg(x) + return true } - // match: (SETEQ (InvertFlags x)) - // result: (SETEQ x) + // match: (MOVWQZX (MOVWQZX x)) + // result: (MOVWQZX x) for { - if v_0.Op != OpAMD64InvertFlags { + if v_0.Op != OpAMD64MOVWQZX { break } x := v_0.Args[0] - v.reset(OpAMD64SETEQ) + v.reset(OpAMD64MOVWQZX) v.AddArg(x) return true } - // match: (SETEQ (FlagEQ)) - // result: (MOVLconst [1]) + // match: (MOVWQZX (MOVBQZX x)) + // result: (MOVBQZX x) for { - if v_0.Op != OpAMD64FlagEQ { + if v_0.Op != OpAMD64MOVBQZX { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + x := v_0.Args[0] + v.reset(OpAMD64MOVBQZX) + v.AddArg(x) return true } - // match: (SETEQ (FlagLT_ULT)) - // result: (MOVLconst [0]) + return false +} +func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWQZX x) for { - if v_0.Op != OpAMD64FlagLT_ULT { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVWstore { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + break + } + v.reset(OpAMD64MOVWQZX) + v.AddArg(x) return true } - // match: (SETEQ (FlagLT_UGT)) - // result: (MOVLconst [0]) + // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { - if v_0.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64MOVWload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETEQ (FlagGT_ULT)) - // result: (MOVLconst [0]) + // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - if v_0.Op != OpAMD64FlagGT_ULT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVWload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (SETEQ (FlagGT_UGT)) - // result: (MOVLconst [0]) + // match: (MOVWload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) for { - if v_0.Op != OpAMD64FlagGT_UGT { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpSB || !(symIsRO(sym)) { break } v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.AuxInt = int32ToAuxInt(int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))) return true } - // match: (SETEQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) - // result: (SETEQ (Select1 blsr)) + return false +} +func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64TESTQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVWQSX { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { - continue - } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { - continue - } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg(v0) - return true - } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true } - // match: (SETEQ (TESTL s:(Select0 blsr:(BLSRL _)) s)) - // result: (SETEQ (Select1 blsr)) + // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64TESTL { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVWQZX { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { - continue - } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRL || s != v_0_1 { - continue - } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg(v0) - return true - } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true } - // match: (SETEQ (VPTEST x:(VPAND128 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETEQ (VPTEST j k)) + // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { - if v_0.Op != OpAMD64VPTEST { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPAND128 { + off2 := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) + return true + } + // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) + // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - v.AddArg(v0) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (SETEQ (VPTEST x:(VPAND256 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETEQ (VPTEST j k)) + // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem) + // result: (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) for { - if v_0.Op != OpAMD64VPTEST { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPAND256 { + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int16(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - v.AddArg(v0) + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SETEQ (VPTEST x:(VPANDD512 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETEQ (VPTEST j k)) + // match: (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) + // cond: x.Uses == 1 && buildcfg.GOAMD64 >= 3 + // result: (MOVBEWstore [i] {s} p w mem) for { - if v_0.Op != OpAMD64VPTEST { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDD512 { + i := auxIntToInt32(v.AuxInt) + s := auxToSym(v.Aux) + p := v_0 + x := v_1 + if x.Op != OpAMD64ROLWconst || auxIntToInt8(x.AuxInt) != 8 { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + w := x.Args[0] + mem := v_2 + if !(x.Uses == 1 && buildcfg.GOAMD64 >= 3) { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - v.AddArg(v0) - return true + v.reset(OpAMD64MOVBEWstore) + v.AuxInt = int32ToAuxInt(i) + v.Aux = symToAux(s) + v.AddArg3(p, w, mem) + return true } - // match: (SETEQ (VPTEST x:(VPANDQ512 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETEQ (VPTEST j k)) + // match: (MOVWstore [off] {sym} ptr (KMOVWi mask) mem) + // result: (KMOVWstore [off] {sym} ptr mask mem) for { - if v_0.Op != OpAMD64VPTEST { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDQ512 { - break - } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64KMOVWi { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - v.AddArg(v0) + mask := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64KMOVWstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (SETEQ (VPTEST x:(VPANDN128 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETB (VPTEST k j)) + return false +} +func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd32(off) + // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) for { - if v_0.Op != OpAMD64VPTEST { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDN128 { + sc := auxIntToValAndOff(v.AuxInt) + s := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + off := auxIntToInt32(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - v.AddArg(v0) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(s) + v.AddArg2(ptr, mem) return true } - // match: (SETEQ (VPTEST x:(VPANDN256 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETB (VPTEST k j)) + // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) + // result: (MOVWstoreconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) for { - if v_0.Op != OpAMD64VPTEST { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDN256 { + sc := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + off := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off)) { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - v.AddArg(v0) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(ValAndOff(sc).addOffset32(off)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) return true } - // match: (SETEQ (VPTEST x:(VPANDND512 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETB (VPTEST k j)) + return false +} +func rewriteValueAMD64_OpAMD64MULL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULL x (MOVLconst [c])) + // result: (MULLconst [c] x) for { - if v_0.Op != OpAMD64VPTEST { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64MULLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDND512 { + break + } + return false +} +func rewriteValueAMD64_OpAMD64MULLconst(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MULLconst [c] (MULLconst [d] x)) + // result: (MULLconst [c * d] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MULLconst { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64MULLconst) + v.AuxInt = int32ToAuxInt(c * d) + v.AddArg(x) + return true + } + // match: (MULLconst [ 0] _) + // result: (MOVLconst [0]) + for { + if auxIntToInt32(v.AuxInt) != 0 { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - v.AddArg(v0) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SETEQ (VPTEST x:(VPANDNQ512 j k) y)) - // cond: x == y && x.Uses == 2 - // result: (SETB (VPTEST k j)) + // match: (MULLconst [ 1] x) + // result: x for { - if v_0.Op != OpAMD64VPTEST { + if auxIntToInt32(v.AuxInt) != 1 { break } - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDNQ512 { + x := v_0 + v.copyOf(x) + return true + } + // match: (MULLconst [c] x) + // cond: v.Type.Size() <= 4 && canMulStrengthReduce32(config, c) + // result: {mulStrengthReduce32(v, x, c)} + for { + c := auxIntToInt32(v.AuxInt) + x := v_0 + if !(v.Type.Size() <= 4 && canMulStrengthReduce32(config, c)) { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + v.copyOf(mulStrengthReduce32(v, x, c)) + return true + } + // match: (MULLconst [c] (MOVLconst [d])) + // result: (MOVLconst [c*d]) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - v.AddArg(v0) + d := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(c * d) return true } return false } -func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MULQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETEQstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) - // result: (SETAEstore [off] {sym} ptr (BTL x y) mem) + // match: (MULQ x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (MULQconst [int32(c)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64SHLL { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { continue } - x := v_1_0.Args[1] - v_1_0_0 := v_1_0.Args[0] - if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { continue } - y := v_1_1 - mem := v_2 - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64MULQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) return true } break } - // match: (SETEQstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) - // result: (SETAEstore [off] {sym} ptr (BTQ x y) mem) + return false +} +func rewriteValueAMD64_OpAMD64MULQconst(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MULQconst [c] (MULQconst [d] x)) + // cond: is32Bit(int64(c)*int64(d)) + // result: (MULQconst [c * d] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MULQconst { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64SHLQ { - continue - } - x := v_1_0.Args[1] - v_1_0_0 := v_1_0.Args[0] - if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { - continue - } - y := v_1_1 - mem := v_2 - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg3(ptr, v0, mem) - return true + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(int64(c) * int64(d))) { + break } - break + v.reset(OpAMD64MULQconst) + v.AuxInt = int32ToAuxInt(c * d) + v.AddArg(x) + return true } - // match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem) - // cond: isPowerOfTwo(uint32(c)) - // result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem) + // match: (MULQconst [ 0] _) + // result: (MOVQconst [0]) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTLconst { + if auxIntToInt32(v.AuxInt) != 0 { break } - c := auxIntToInt32(v_1.AuxInt) - x := v_1.Args[0] - mem := v_2 - if !(isPowerOfTwo(uint32(c))) { + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (MULQconst [ 1] x) + // result: x + for { + if auxIntToInt32(v.AuxInt) != 1 { break } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + x := v_0 + v.copyOf(x) return true } - // match: (SETEQstore [off] {sym} ptr (TESTQconst [c] x) mem) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem) + // match: (MULQconst [c] x) + // cond: canMulStrengthReduce(config, int64(c)) + // result: {mulStrengthReduce(v, x, int64(c))} for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQconst { + c := auxIntToInt32(v.AuxInt) + x := v_0 + if !(canMulStrengthReduce(config, int64(c))) { break } - c := auxIntToInt32(v_1.AuxInt) - x := v_1.Args[0] - mem := v_2 - if !(isPowerOfTwo(uint64(c))) { + v.copyOf(mulStrengthReduce(v, x, int64(c))) + return true + } + // match: (MULQconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(c)*d]) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(c) * d) return true } - // match: (SETEQstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem) + // match: (MULQconst [c] (NEGQ x)) + // cond: c != -(1<<31) + // result: (MULQconst [-c] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64NEGQ { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64MOVQconst { + x := v_0.Args[0] + if !(c != -(1 << 31)) { + break + } + v.reset(OpAMD64MULQconst) + v.AuxInt = int32ToAuxInt(-c) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MULSD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (MULSDload x [off] {sym} ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSDload { continue } - c := auxIntToInt64(v_1_0.AuxInt) - x := v_1_1 - mem := v_2 - if !(isPowerOfTwo(uint64(c))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } - v.reset(OpAMD64SETAEstore) + v.reset(OpAMD64MULSDload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + v.AddArg3(x, ptr, mem) return true } break } - // match: (SETEQstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) - // result: (SETNEstore [off] {sym} ptr (CMPLconst [0] s) mem) + return false +} +func rewriteValueAMD64_OpAMD64MULSDload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (MULSDload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (MULSDload [off1+off2] {sym} val base mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - s := v_1.Args[0] - if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - mem := v_2 - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64MULSDload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg3(ptr, v0, mem) + v.AddArg3(val, base, mem) return true } - // match: (SETEQstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) - // result: (SETNEstore [off] {sym} ptr (CMPQconst [0] s) mem) + // match: (MULSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (MULSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - s := v_1.Args[0] - if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - mem := v_2 - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64MULSDload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem) + // match: (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) + // result: (MULSD x (MOVQi2f y)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64MULSD) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64) + v0.AddArg(y) + v.AddArg2(x, v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MULSS(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (MULSSload x [off] {sym} ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSSload { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTQconst [0] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTLconst [0] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - x := z1.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETAEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) - // cond: z1==z2 - // result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - x := z1.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { continue } - v.reset(OpAMD64SETAEstore) + v.reset(OpAMD64MULSSload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + v.AddArg3(x, ptr, mem) return true } break } - // match: (SETEQstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETEQstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETEQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETEQstore [off1] {sym} (ADDQconst [off2] base) val mem) + return false +} +func rewriteValueAMD64_OpAMD64MULSSload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (MULSSload [off1] {sym} val (ADDQconst [off2] base) mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETEQstore [off1+off2] {sym} base val mem) + // result: (MULSSload [off1+off2] {sym} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETEQstore) + v.reset(OpAMD64MULSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg3(val, base, mem) return true } - // match: (SETEQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MULSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETEQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MULSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETEQstore) + v.reset(OpAMD64MULSSload) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg3(val, base, mem) return true } - // match: (SETEQstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) + // result: (MULSS x (MOVLi2f y)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + y := v_2.Args[1] + if ptr != v_2.Args[0] { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64MULSS) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + return false +} +func rewriteValueAMD64_OpAMD64NEGL(v *Value) bool { + v_0 := v.Args[0] + // match: (NEGL (NEGL x)) + // result: x for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + if v_0.Op != OpAMD64NEGL { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + x := v_0.Args[0] + v.copyOf(x) return true } - // match: (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (NEGL s:(SUBL x y)) + // cond: s.Uses == 1 + // result: (SUBL y x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + s := v_0 + if s.Op != OpAMD64SUBL { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + y := s.Args[1] + x := s.Args[0] + if !(s.Uses == 1) { + break + } + v.reset(OpAMD64SUBL) + v.AddArg2(y, x) return true } - // match: (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (NEGL (MOVLconst [c])) + // result: (MOVLconst [-c]) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + if v_0.Op != OpAMD64MOVLconst { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + c := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(-c) return true } return false } -func rewriteValueAMD64_OpAMD64SETG(v *Value) bool { +func rewriteValueAMD64_OpAMD64NEGQ(v *Value) bool { v_0 := v.Args[0] - // match: (SETG (InvertFlags x)) - // result: (SETL x) + // match: (NEGQ (NEGQ x)) + // result: x for { - if v_0.Op != OpAMD64InvertFlags { + if v_0.Op != OpAMD64NEGQ { break } x := v_0.Args[0] - v.reset(OpAMD64SETL) - v.AddArg(x) + v.copyOf(x) return true } - // match: (SETG (FlagEQ)) - // result: (MOVLconst [0]) + // match: (NEGQ s:(SUBQ x y)) + // cond: s.Uses == 1 + // result: (SUBQ y x) for { - if v_0.Op != OpAMD64FlagEQ { + s := v_0 + if s.Op != OpAMD64SUBQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true - } - // match: (SETG (FlagLT_ULT)) - // result: (MOVLconst [0]) - for { - if v_0.Op != OpAMD64FlagLT_ULT { + y := s.Args[1] + x := s.Args[0] + if !(s.Uses == 1) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64SUBQ) + v.AddArg2(y, x) return true } - // match: (SETG (FlagLT_UGT)) - // result: (MOVLconst [0]) + // match: (NEGQ (MOVQconst [c])) + // result: (MOVQconst [-c]) for { - if v_0.Op != OpAMD64FlagLT_UGT { + if v_0.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + c := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(-c) return true } - // match: (SETG (FlagGT_ULT)) - // result: (MOVLconst [1]) + // match: (NEGQ (ADDQconst [c] (NEGQ x))) + // cond: c != -(1<<31) + // result: (ADDQconst [-c] x) for { - if v_0.Op != OpAMD64FlagGT_ULT { + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + c := auxIntToInt32(v_0.AuxInt) + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64NEGQ { + break + } + x := v_0_0.Args[0] + if !(c != -(1 << 31)) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(-c) + v.AddArg(x) return true } - // match: (SETG (FlagGT_UGT)) - // result: (MOVLconst [1]) + return false +} +func rewriteValueAMD64_OpAMD64NOTL(v *Value) bool { + v_0 := v.Args[0] + // match: (NOTL (MOVLconst [c])) + // result: (MOVLconst [^c]) for { - if v_0.Op != OpAMD64FlagGT_UGT { + if v_0.Op != OpAMD64MOVLconst { break } + c := auxIntToInt32(v_0.AuxInt) v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + v.AuxInt = int32ToAuxInt(^c) return true } return false } -func rewriteValueAMD64_OpAMD64SETGE(v *Value) bool { +func rewriteValueAMD64_OpAMD64NOTQ(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (SETGE c:(CMPQconst [128] x)) - // cond: c.Uses == 1 - // result: (SETG (CMPQconst [127] x)) + // match: (NOTQ (MOVQconst [c])) + // result: (MOVQconst [^c]) for { - c := v_0 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { + if v_0.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64SETG) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) + c := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(^c) return true } - // match: (SETGE c:(CMPLconst [128] x)) - // cond: c.Uses == 1 - // result: (SETG (CMPLconst [127] x)) + return false +} +func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ORL (SHLL (MOVLconst [1]) y) x) + // result: (BTSL x y) for { - c := v_0 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLL { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTSL) + v.AddArg2(x, y) + return true } - v.reset(OpAMD64SETG) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) - return true + break } - // match: (SETGE (InvertFlags x)) - // result: (SETLE x) - for { - if v_0.Op != OpAMD64InvertFlags { - break + // match: (ORL x (MOVLconst [c])) + // result: (ORLconst [c] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ORLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true } - x := v_0.Args[0] - v.reset(OpAMD64SETLE) - v.AddArg(x) - return true + break } - // match: (SETGE (FlagEQ)) - // result: (MOVLconst [1]) + // match: (ORL x x) + // result: x for { - if v_0.Op != OpAMD64FlagEQ { + x := v_0 + if x != v_1 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + v.copyOf(x) return true } - // match: (SETGE (FlagLT_ULT)) - // result: (MOVLconst [0]) + // match: (ORL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (ORLload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64FlagLT_ULT { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVLload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(OpAMD64ORLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64ORLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ORLconst [c] (ORLconst [d] x)) + // result: (ORLconst [c | d] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64ORLconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64ORLconst) + v.AuxInt = int32ToAuxInt(c | d) + v.AddArg(x) return true } - // match: (SETGE (FlagLT_UGT)) - // result: (MOVLconst [0]) + // match: (ORLconst [0] x) + // result: x for { - if v_0.Op != OpAMD64FlagLT_UGT { + if auxIntToInt32(v.AuxInt) != 0 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + x := v_0 + v.copyOf(x) return true } - // match: (SETGE (FlagGT_ULT)) - // result: (MOVLconst [1]) + // match: (ORLconst [-1] _) + // result: (MOVLconst [-1]) for { - if v_0.Op != OpAMD64FlagGT_ULT { + if auxIntToInt32(v.AuxInt) != -1 { break } v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + v.AuxInt = int32ToAuxInt(-1) return true } - // match: (SETGE (FlagGT_UGT)) - // result: (MOVLconst [1]) + // match: (ORLconst [c] (MOVLconst [d])) + // result: (MOVLconst [c|d]) for { - if v_0.Op != OpAMD64FlagGT_UGT { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } + d := auxIntToInt32(v_0.AuxInt) v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + v.AuxInt = int32ToAuxInt(c | d) return true } return false } -func rewriteValueAMD64_OpAMD64SETGEstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64ORLconstmodify(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETGEstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETLEstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETLEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETGEstore [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETGEstore [off1+off2] {sym} base val mem) + // match: (ORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } off2 := auxIntToInt32(v_0.AuxInt) base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { break } - v.reset(OpAMD64SETGEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETGEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETGEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (ORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (ORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break @@ -24232,135 +28638,98 @@ func rewriteValueAMD64_OpAMD64SETGEstore(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETGEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64ORLconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETGEstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + return false +} +func rewriteValueAMD64_OpAMD64ORLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (ORLload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (ORLload [off1+off2] {sym} val base mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + if !(is32Bit(int64(off1) + int64(off2))) { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64ORLload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.AddArg3(val, base, mem) return true } - // match: (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (ORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (ORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64ORLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) + // result: ( ORL x (MOVLf2i y)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64ORL) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) + v0.AddArg(y) + v.AddArg2(x, v0) return true } return false } -func rewriteValueAMD64_OpAMD64SETGstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETGstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETLstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETLstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETGstore [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETGstore [off1+off2] {sym} base val mem) + // result: (ORLmodify [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -24374,15 +28743,15 @@ func rewriteValueAMD64_OpAMD64SETGstore(v *Value) bool { if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETGstore) + v.reset(OpAMD64ORLmodify) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) v.AddArg3(base, val, mem) return true } - // match: (SETGstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (ORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETGstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (ORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -24397,323 +28766,322 @@ func rewriteValueAMD64_OpAMD64SETGstore(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETGstore) + v.reset(OpAMD64ORLmodify) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - // match: (SETGstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + return false +} +func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ORQ (SHLQ (MOVQconst [1]) y) x) + // result: (BTSQ x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLQ { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTSQ) + v.AddArg2(x, y) + return true } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) - return true + break } - // match: (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (ORQ (MOVQconst [c]) x) + // cond: isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31 + // result: (BTSQconst [int8(log64u(uint64(c)))] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) { + continue + } + v.reset(OpAMD64BTSQconst) + v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v.AddArg(x) + return true } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) - return true + break } - // match: (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (ORQ x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (ORQconst [int32(c)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { + continue + } + v.reset(OpAMD64ORQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) - return true + break } - // match: (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (ORQ x (MOVLconst [c])) + // result: (ORQconst [c] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ORQconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true + break } - // match: (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (ORQ (SHRQ lo bits) (SHLQ hi (NEGQ bits))) + // result: (SHRDQ lo hi bits) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHLQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHRDQ) + v.AddArg3(lo, hi, bits) + return true } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64SETL(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (SETL c:(CMPQconst [128] x)) - // cond: c.Uses == 1 - // result: (SETLE (CMPQconst [127] x)) + // match: (ORQ (SHLQ lo bits) (SHRQ hi (NEGQ bits))) + // result: (SHLDQ lo hi bits) for { - c := v_0 - if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHRQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHLDQ) + v.AddArg3(lo, hi, bits) + return true } - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETL c:(CMPLconst [128] x)) - // cond: c.Uses == 1 - // result: (SETLE (CMPLconst [127] x)) - for { - c := v_0 - if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { - break - } - x := c.Args[0] - if !(c.Uses == 1) { - break - } - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(x) - v.AddArg(v0) - return true - } - // match: (SETL (InvertFlags x)) - // result: (SETG x) - for { - if v_0.Op != OpAMD64InvertFlags { - break - } - x := v_0.Args[0] - v.reset(OpAMD64SETG) - v.AddArg(x) - return true + break } - // match: (SETL (FlagEQ)) - // result: (MOVLconst [0]) + // match: (ORQ (SHRXQ lo bits) (SHLXQ hi (NEGQ bits))) + // result: (SHRDQ lo hi bits) for { - if v_0.Op != OpAMD64FlagEQ { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHRXQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHLXQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHRDQ) + v.AddArg3(lo, hi, bits) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true + break } - // match: (SETL (FlagLT_ULT)) - // result: (MOVLconst [1]) + // match: (ORQ (SHLXQ lo bits) (SHRXQ hi (NEGQ bits))) + // result: (SHLDQ lo hi bits) for { - if v_0.Op != OpAMD64FlagLT_ULT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLXQ { + continue + } + bits := v_0.Args[1] + lo := v_0.Args[0] + if v_1.Op != OpAMD64SHRXQ { + continue + } + _ = v_1.Args[1] + hi := v_1.Args[0] + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpAMD64NEGQ || bits != v_1_1.Args[0] { + continue + } + v.reset(OpAMD64SHLDQ) + v.AddArg3(lo, hi, bits) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true + break } - // match: (SETL (FlagLT_UGT)) - // result: (MOVLconst [1]) + // match: (ORQ (MOVQconst [c]) (MOVQconst [d])) + // result: (MOVQconst [c|d]) for { - if v_0.Op != OpAMD64FlagLT_UGT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpAMD64MOVQconst { + continue + } + d := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(c | d) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true + break } - // match: (SETL (FlagGT_ULT)) - // result: (MOVLconst [0]) + // match: (ORQ x x) + // result: x for { - if v_0.Op != OpAMD64FlagGT_ULT { + x := v_0 + if x != v_1 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.copyOf(x) return true } - // match: (SETL (FlagGT_UGT)) - // result: (MOVLconst [0]) + // match: (ORQ x l:(MOVQload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (ORQload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64FlagGT_UGT { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVQload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(OpAMD64ORQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true + break } return false } -func rewriteValueAMD64_OpAMD64SETLE(v *Value) bool { +func rewriteValueAMD64_OpAMD64ORQconst(v *Value) bool { v_0 := v.Args[0] - // match: (SETLE (InvertFlags x)) - // result: (SETGE x) + // match: (ORQconst [c] (ORQconst [d] x)) + // result: (ORQconst [c | d] x) for { - if v_0.Op != OpAMD64InvertFlags { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64ORQconst { break } + d := auxIntToInt32(v_0.AuxInt) x := v_0.Args[0] - v.reset(OpAMD64SETGE) + v.reset(OpAMD64ORQconst) + v.AuxInt = int32ToAuxInt(c | d) v.AddArg(x) return true } - // match: (SETLE (FlagEQ)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagEQ { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETLE (FlagLT_ULT)) - // result: (MOVLconst [1]) - for { - if v_0.Op != OpAMD64FlagLT_ULT { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) - return true - } - // match: (SETLE (FlagLT_UGT)) - // result: (MOVLconst [1]) + // match: (ORQconst [0] x) + // result: x for { - if v_0.Op != OpAMD64FlagLT_UGT { + if auxIntToInt32(v.AuxInt) != 0 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + x := v_0 + v.copyOf(x) return true } - // match: (SETLE (FlagGT_ULT)) - // result: (MOVLconst [0]) + // match: (ORQconst [-1] _) + // result: (MOVQconst [-1]) for { - if v_0.Op != OpAMD64FlagGT_ULT { + if auxIntToInt32(v.AuxInt) != -1 { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(-1) return true } - // match: (SETLE (FlagGT_UGT)) - // result: (MOVLconst [0]) + // match: (ORQconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(c)|d]) for { - if v_0.Op != OpAMD64FlagGT_UGT { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(c) | d) return true } return false } -func rewriteValueAMD64_OpAMD64SETLEstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64ORQconstmodify(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETLEstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETGEstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETGEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETLEstore [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETLEstore [off1+off2] {sym} base val mem) + // match: (ORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (ORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } off2 := auxIntToInt32(v_0.AuxInt) base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { break } - v.reset(OpAMD64SETLEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETLEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETLEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (ORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (ORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - off1 := auxIntToInt32(v.AuxInt) + valoff1 := auxIntToValAndOff(v.AuxInt) sym1 := auxToSym(v.Aux) if v_0.Op != OpAMD64LEAQ { break @@ -24721,135 +29089,98 @@ func rewriteValueAMD64_OpAMD64SETLEstore(v *Value) bool { off2 := auxIntToInt32(v_0.AuxInt) sym2 := auxToSym(v_0.Aux) base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETLEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) return true } - // match: (SETLEstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + return false +} +func rewriteValueAMD64_OpAMD64ORQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (ORQload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (ORQload [off1+off2] {sym} val base mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + if !(is32Bit(int64(off1) + int64(off2))) { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64ORQload) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + v.AddArg3(val, base, mem) return true } - // match: (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (ORQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (ORQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64ORQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) + // result: ( ORQ x (MOVQf2i y)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64ORQ) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) + v0.AddArg(y) + v.AddArg2(x, v0) return true } return false } -func rewriteValueAMD64_OpAMD64SETLstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64ORQmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETLstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETGstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETGstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETLstore [off1] {sym} (ADDQconst [off2] base) val mem) + // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETLstore [off1+off2] {sym} base val mem) + // result: (ORQmodify [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -24863,15 +29194,15 @@ func rewriteValueAMD64_OpAMD64SETLstore(v *Value) bool { if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SETLstore) + v.reset(OpAMD64ORQmodify) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) v.AddArg3(base, val, mem) return true } - // match: (SETLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (ORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (ORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -24886,1228 +29217,623 @@ func rewriteValueAMD64_OpAMD64SETLstore(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SETLstore) + v.reset(OpAMD64ORQmodify) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - // match: (SETLstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + return false +} +func rewriteValueAMD64_OpAMD64ROLB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ROLB x (NEGQ y)) + // result: (RORB x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + y := v_1.Args[0] + v.reset(OpAMD64RORB) + v.AddArg2(x, y) return true } - // match: (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (ROLB x (NEGL y)) + // result: (RORB x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + y := v_1.Args[0] + v.reset(OpAMD64RORB) + v.AddArg2(x, y) return true } - // match: (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + // match: (ROLB x (MOVQconst [c])) + // result: (ROLBconst [int8(c&7) ] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLBconst) + v.AuxInt = int8ToAuxInt(int8(c & 7)) + v.AddArg(x) return true } - // match: (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + // match: (ROLB x (MOVLconst [c])) + // result: (ROLBconst [int8(c&7) ] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLBconst) + v.AuxInt = int8ToAuxInt(int8(c & 7)) + v.AddArg(x) return true } - // match: (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { + return false +} +func rewriteValueAMD64_OpAMD64ROLBconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROLBconst x [0]) + // result: x + for { + if auxIntToInt8(v.AuxInt) != 0 { break } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) + x := v_0 + v.copyOf(x) return true } return false } -func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool { +func rewriteValueAMD64_OpAMD64ROLL(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (SETNE (TESTBconst [1] x)) - // result: (ANDLconst [1] x) + // match: (ROLL x (NEGQ y)) + // result: (RORL x y) for { - if v_0.Op != OpAMD64TESTBconst || auxIntToInt8(v_0.AuxInt) != 1 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + y := v_1.Args[0] + v.reset(OpAMD64RORL) + v.AddArg2(x, y) return true } - // match: (SETNE (TESTWconst [1] x)) - // result: (ANDLconst [1] x) + // match: (ROLL x (NEGL y)) + // result: (RORL x y) for { - if v_0.Op != OpAMD64TESTWconst || auxIntToInt16(v_0.AuxInt) != 1 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + y := v_1.Args[0] + v.reset(OpAMD64RORL) + v.AddArg2(x, y) return true } - // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y)) - // result: (SETB (BTL x y)) + // match: (ROLL x (MOVQconst [c])) + // result: (ROLLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLL { - continue - } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { - continue - } - y := v_0_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } - break + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true } - // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y)) - // result: (SETB (BTQ x y)) + // match: (ROLL x (MOVLconst [c])) + // result: (ROLLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLQ { - continue - } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { - continue - } - y := v_0_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } - break + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true } - // match: (SETNE (TESTLconst [c] x)) - // cond: isPowerOfTwo(uint32(c)) - // result: (SETB (BTLconst [int8(log32u(uint32(c)))] x)) + return false +} +func rewriteValueAMD64_OpAMD64ROLLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROLLconst x [0]) + // result: x for { - if v_0.Op != OpAMD64TESTLconst { - break - } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint32(c))) { + if auxIntToInt8(v.AuxInt) != 0 { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg(v0) + x := v_0 + v.copyOf(x) return true } - // match: (SETNE (TESTQconst [c] x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETB (BTQconst [int8(log32u(uint32(c)))] x)) + return false +} +func rewriteValueAMD64_OpAMD64ROLQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ROLQ x (NEGQ y)) + // result: (RORQ x y) for { - if v_0.Op != OpAMD64TESTQconst { - break - } - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint64(c))) { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg(v0) + y := v_1.Args[0] + v.reset(OpAMD64RORQ) + v.AddArg2(x, y) return true } - // match: (SETNE (TESTQ (MOVQconst [c]) x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETB (BTQconst [int8(log64u(uint64(c)))] x)) + // match: (ROLQ x (NEGL y)) + // result: (RORQ x y) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_0_0.AuxInt) - x := v_0_1 - if !(isPowerOfTwo(uint64(c))) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + y := v_1.Args[0] + v.reset(OpAMD64RORQ) + v.AddArg2(x, y) + return true } - // match: (SETNE (CMPLconst [1] s:(ANDLconst [1] _))) - // result: (SETEQ (CMPLconst [0] s)) + // match: (ROLQ x (MOVQconst [c])) + // result: (ROLQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 { - break - } - s := v_0.Args[0] - if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg(v0) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (SETNE (CMPQconst [1] s:(ANDQconst [1] _))) - // result: (SETEQ (CMPQconst [0] s)) + // match: (ROLQ x (MOVLconst [c])) + // result: (ROLQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 { - break - } - s := v_0.Args[0] - if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg(v0) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (SETNE (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) - // cond: z1==z2 - // result: (SETB (BTQconst [63] x)) + return false +} +func rewriteValueAMD64_OpAMD64ROLQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROLQconst x [0]) + // result: x for { - if v_0.Op != OpAMD64TESTQ { + if auxIntToInt8(v.AuxInt) != 0 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + x := v_0 + v.copyOf(x) + return true } - // match: (SETNE (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) - // cond: z1==z2 - // result: (SETB (BTQconst [31] x)) + return false +} +func rewriteValueAMD64_OpAMD64ROLW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ROLW x (NEGQ y)) + // result: (RORW x y) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + y := v_1.Args[0] + v.reset(OpAMD64RORW) + v.AddArg2(x, y) + return true } - // match: (SETNE (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) - // cond: z1==z2 - // result: (SETB (BTQconst [0] x)) + // match: (ROLW x (NEGL y)) + // result: (RORW x y) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + y := v_1.Args[0] + v.reset(OpAMD64RORW) + v.AddArg2(x, y) + return true } - // match: (SETNE (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) - // cond: z1==z2 - // result: (SETB (BTLconst [0] x)) + // match: (ROLW x (MOVQconst [c])) + // result: (ROLWconst [int8(c&15)] x) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(int8(c & 15)) + v.AddArg(x) + return true } - // match: (SETNE (TESTQ z1:(SHRQconst [63] x) z2)) - // cond: z1==z2 - // result: (SETB (BTQconst [63] x)) + // match: (ROLW x (MOVLconst [c])) + // result: (ROLWconst [int8(c&15)] x) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(int8(c & 15)) + v.AddArg(x) + return true } - // match: (SETNE (TESTL z1:(SHRLconst [31] x) z2)) - // cond: z1==z2 - // result: (SETB (BTLconst [31] x)) + return false +} +func rewriteValueAMD64_OpAMD64ROLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROLWconst x [0]) + // result: x for { - if v_0.Op != OpAMD64TESTL { + if auxIntToInt8(v.AuxInt) != 0 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg(v0) - return true - } - break + x := v_0 + v.copyOf(x) + return true } - // match: (SETNE (InvertFlags x)) - // result: (SETNE x) + return false +} +func rewriteValueAMD64_OpAMD64RORB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (RORB x (NEGQ y)) + // result: (ROLB x y) for { - if v_0.Op != OpAMD64InvertFlags { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - x := v_0.Args[0] - v.reset(OpAMD64SETNE) - v.AddArg(x) + y := v_1.Args[0] + v.reset(OpAMD64ROLB) + v.AddArg2(x, y) return true } - // match: (SETNE (FlagEQ)) - // result: (MOVLconst [0]) + // match: (RORB x (NEGL y)) + // result: (ROLB x y) for { - if v_0.Op != OpAMD64FlagEQ { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + y := v_1.Args[0] + v.reset(OpAMD64ROLB) + v.AddArg2(x, y) return true } - // match: (SETNE (FlagLT_ULT)) - // result: (MOVLconst [1]) + // match: (RORB x (MOVQconst [c])) + // result: (ROLBconst [int8((-c)&7) ] x) for { - if v_0.Op != OpAMD64FlagLT_ULT { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLBconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 7)) + v.AddArg(x) return true } - // match: (SETNE (FlagLT_UGT)) - // result: (MOVLconst [1]) + // match: (RORB x (MOVLconst [c])) + // result: (ROLBconst [int8((-c)&7) ] x) for { - if v_0.Op != OpAMD64FlagLT_UGT { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLBconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 7)) + v.AddArg(x) return true } - // match: (SETNE (FlagGT_ULT)) - // result: (MOVLconst [1]) + return false +} +func rewriteValueAMD64_OpAMD64RORL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (RORL x (NEGQ y)) + // result: (ROLL x y) for { - if v_0.Op != OpAMD64FlagGT_ULT { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + y := v_1.Args[0] + v.reset(OpAMD64ROLL) + v.AddArg2(x, y) return true } - // match: (SETNE (FlagGT_UGT)) - // result: (MOVLconst [1]) + // match: (RORL x (NEGL y)) + // result: (ROLL x y) for { - if v_0.Op != OpAMD64FlagGT_UGT { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(1) + y := v_1.Args[0] + v.reset(OpAMD64ROLL) + v.AddArg2(x, y) return true } - // match: (SETNE (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) - // result: (SETNE (Select1 blsr)) + // match: (RORL x (MOVQconst [c])) + // result: (ROLLconst [int8((-c)&31)] x) for { - if v_0.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { - continue - } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { - continue - } - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg(v0) - return true - } - break + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLLconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 31)) + v.AddArg(x) + return true } - // match: (SETNE (TESTL s:(Select0 blsr:(BLSRL _)) s)) - // result: (SETNE (Select1 blsr)) + // match: (RORL x (MOVLconst [c])) + // result: (ROLLconst [int8((-c)&31)] x) for { - if v_0.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { - continue - } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRL || s != v_0_1 { - continue - } - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - v.AddArg(v0) - return true - } - break + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLLconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 31)) + v.AddArg(x) + return true } return false } -func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64RORQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SETNEstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) - // result: (SETBstore [off] {sym} ptr (BTL x y) mem) + // match: (RORQ x (NEGQ y)) + // result: (ROLQ x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64SHLL { - continue - } - x := v_1_0.Args[1] - v_1_0_0 := v_1_0.Args[0] - if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { - continue - } - y := v_1_1 - mem := v_2 - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg3(ptr, v0, mem) - return true - } - break + y := v_1.Args[0] + v.reset(OpAMD64ROLQ) + v.AddArg2(x, y) + return true } - // match: (SETNEstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) - // result: (SETBstore [off] {sym} ptr (BTQ x y) mem) + // match: (RORQ x (NEGL y)) + // result: (ROLQ x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64SHLQ { - continue - } - x := v_1_0.Args[1] - v_1_0_0 := v_1_0.Args[0] - if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { - continue - } - y := v_1_1 - mem := v_2 - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg3(ptr, v0, mem) - return true + y := v_1.Args[0] + v.reset(OpAMD64ROLQ) + v.AddArg2(x, y) + return true + } + // match: (RORQ x (MOVQconst [c])) + // result: (ROLQconst [int8((-c)&63)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break } - break + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLQconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 63)) + v.AddArg(x) + return true } - // match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem) - // cond: isPowerOfTwo(uint32(c)) - // result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem) + // match: (RORQ x (MOVLconst [c])) + // result: (ROLQconst [int8((-c)&63)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTLconst { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } c := auxIntToInt32(v_1.AuxInt) - x := v_1.Args[0] - mem := v_2 - if !(isPowerOfTwo(uint32(c))) { + v.reset(OpAMD64ROLQconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 63)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64RORW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (RORW x (NEGQ y)) + // result: (ROLW x y) + for { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + y := v_1.Args[0] + v.reset(OpAMD64ROLW) + v.AddArg2(x, y) return true } - // match: (SETNEstore [off] {sym} ptr (TESTQconst [c] x) mem) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETBstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem) + // match: (RORW x (NEGL y)) + // result: (ROLW x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQconst { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - c := auxIntToInt32(v_1.AuxInt) - x := v_1.Args[0] - mem := v_2 - if !(isPowerOfTwo(uint64(c))) { + y := v_1.Args[0] + v.reset(OpAMD64ROLW) + v.AddArg2(x, y) + return true + } + // match: (RORW x (MOVQconst [c])) + // result: (ROLWconst [int8((-c)&15)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 15)) + v.AddArg(x) return true } - // match: (SETNEstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) - // cond: isPowerOfTwo(uint64(c)) - // result: (SETBstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem) + // match: (RORW x (MOVLconst [c])) + // result: (ROLWconst [int8((-c)&15)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - if v_1_0.Op != OpAMD64MOVQconst { - continue - } - c := auxIntToInt64(v_1_0.AuxInt) - x := v_1_1 - mem := v_2 - if !(isPowerOfTwo(uint64(c))) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(int8((-c) & 15)) + v.AddArg(x) + return true } - // match: (SETNEstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) - // result: (SETEQstore [off] {sym} ptr (CMPLconst [0] s) mem) + return false +} +func rewriteValueAMD64_OpAMD64SARB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SARB x (MOVQconst [c])) + // result: (SARBconst [int8(min(int64(c)&31,7))] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - s := v_1.Args[0] - if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SARBconst) + v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7))) + v.AddArg(x) + return true + } + // match: (SARB x (MOVLconst [c])) + // result: (SARBconst [int8(min(int64(c)&31,7))] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - mem := v_2 - v.reset(OpAMD64SETEQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg3(ptr, v0, mem) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SARBconst) + v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 7))) + v.AddArg(x) return true } - // match: (SETNEstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) - // result: (SETEQstore [off] {sym} ptr (CMPQconst [0] s) mem) + return false +} +func rewriteValueAMD64_OpAMD64SARBconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SARBconst x [0]) + // result: x for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 { + if auxIntToInt8(v.AuxInt) != 0 { break } - s := v_1.Args[0] - if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { + x := v_0 + v.copyOf(x) + return true + } + // match: (SARBconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(int8(d))>>uint64(c)]) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - mem := v_2 - v.reset(OpAMD64SETEQstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(s) - v.AddArg3(ptr, v0, mem) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(int8(d)) >> uint64(c)) return true } - // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem) + return false +} +func rewriteValueAMD64_OpAMD64SARL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SARL x (MOVQconst [c])) + // result: (SARLconst [int8(c&31)] x) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SARLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true + } + // match: (SARL x (MOVLconst [c])) + // result: (SARLconst [int8(c&31)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + break } - break + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SARLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) + return true } - // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem) + // match: (SARL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SARL x y) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break } - break + v.reset(OpAMD64SARL) + v.AddArg2(x, y) + return true } - // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTQconst [0] x) mem) + // match: (SARL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SARL x (NEGQ y)) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTLconst [0] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { - continue - } - x := z1_0.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTQ { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { - continue - } - x := z1.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) - // cond: z1==z2 - // result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64TESTL { - break - } - _ = v_1.Args[1] - v_1_0 := v_1.Args[0] - v_1_1 := v_1.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { - z1 := v_1_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { - continue - } - x := z1.Args[0] - z2 := v_1_1 - mem := v_2 - if !(z1 == z2) { - continue - } - v.reset(OpAMD64SETBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - v.AddArg3(ptr, v0, mem) - return true - } - break - } - // match: (SETNEstore [off] {sym} ptr (InvertFlags x) mem) - // result: (SETNEstore [off] {sym} ptr x mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64InvertFlags { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) - return true - } - // match: (SETNEstore [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SETNEstore [off1+off2] {sym} base val mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break - } - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) - return true - } - // match: (SETNEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SETNEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64SETNEstore) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true - } - // match: (SETNEstore [off] {sym} ptr (FlagEQ) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagEQ { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(0) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_ULT { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagLT_UGT { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_ULT { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - // match: (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) - // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64FlagGT_UGT { - break - } - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) - v0.AuxInt = int32ToAuxInt(1) - v.AddArg3(ptr, v0, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (SHLL x (MOVQconst [c])) - // result: (SHLLconst [int8(c&31)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) - return true - } - // match: (SHLL x (MOVLconst [c])) - // result: (SHLLconst [int8(c&31)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) - return true - } - // match: (SHLL x (ADDQconst [c] y)) - // cond: c & 31 == 0 - // result: (SHLL x y) - for { - x := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 0) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) - return true - } - // match: (SHLL x (NEGQ (ADDQconst [c] y))) - // cond: c & 31 == 0 - // result: (SHLL x (NEGQ y)) - for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type + t := v_1.Type v_1_0 := v_1.Args[0] if v_1_0.Op != OpAMD64ADDQconst { break @@ -26117,15 +29843,15 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 0) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLL x (ANDQconst [c] y)) + // match: (SARL x (ANDQconst [c] y)) // cond: c & 31 == 31 - // result: (SHLL x y) + // result: (SARL x y) for { x := v_0 if v_1.Op != OpAMD64ANDQconst { @@ -26136,13 +29862,13 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 31) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v.AddArg2(x, y) return true } - // match: (SHLL x (NEGQ (ANDQconst [c] y))) + // match: (SARL x (NEGQ (ANDQconst [c] y))) // cond: c & 31 == 31 - // result: (SHLL x (NEGQ y)) + // result: (SARL x (NEGQ y)) for { x := v_0 if v_1.Op != OpAMD64NEGQ { @@ -26158,15 +29884,15 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 31) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLL x (ADDLconst [c] y)) + // match: (SARL x (ADDLconst [c] y)) // cond: c & 31 == 0 - // result: (SHLL x y) + // result: (SARL x y) for { x := v_0 if v_1.Op != OpAMD64ADDLconst { @@ -26177,13 +29903,13 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 0) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v.AddArg2(x, y) return true } - // match: (SHLL x (NEGL (ADDLconst [c] y))) + // match: (SARL x (NEGL (ADDLconst [c] y))) // cond: c & 31 == 0 - // result: (SHLL x (NEGL y)) + // result: (SARL x (NEGL y)) for { x := v_0 if v_1.Op != OpAMD64NEGL { @@ -26199,15 +29925,15 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 0) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLL x (ANDLconst [c] y)) + // match: (SARL x (ANDLconst [c] y)) // cond: c & 31 == 31 - // result: (SHLL x y) + // result: (SARL x y) for { x := v_0 if v_1.Op != OpAMD64ANDLconst { @@ -26218,13 +29944,13 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 31) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v.AddArg2(x, y) return true } - // match: (SHLL x (NEGL (ANDLconst [c] y))) + // match: (SARL x (NEGL (ANDLconst [c] y))) // cond: c & 31 == 31 - // result: (SHLL x (NEGL y)) + // result: (SARL x (NEGL y)) for { x := v_0 if v_1.Op != OpAMD64NEGL { @@ -26240,15 +29966,15 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(c&31 == 31) { break } - v.reset(OpAMD64SHLL) + v.reset(OpAMD64SARL) v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLL l:(MOVLload [off] {sym} ptr mem) x) + // match: (SARL l:(MOVLload [off] {sym} ptr mem) x) // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHLXLload [off] {sym} ptr x mem) + // result: (SARXLload [off] {sym} ptr x mem) for { l := v_0 if l.Op != OpAMD64MOVLload { @@ -26262,7 +29988,7 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLXLload) + v.reset(OpAMD64SARXLload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(ptr, x, mem) @@ -26270,9 +29996,9 @@ func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64SARLconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHLLconst x [0]) + // match: (SARLconst x [0]) // result: x for { if auxIntToInt8(v.AuxInt) != 0 { @@ -26282,81 +30008,53 @@ func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { v.copyOf(x) return true } - // match: (SHLLconst [1] x) - // result: (ADDL x x) - for { - if auxIntToInt8(v.AuxInt) != 1 { - break - } - x := v_0 - v.reset(OpAMD64ADDL) - v.AddArg2(x, x) - return true - } - // match: (SHLLconst [c] (ADDL x x)) - // cond: c < 31 - // result: (SHLLconst [c+1] x) + // match: (SARLconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(int32(d))>>uint64(c)]) for { c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64ADDL { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] || !(c < 31) { - break - } - v.reset(OpAMD64SHLLconst) - v.AuxInt = int8ToAuxInt(c + 1) - v.AddArg(x) - return true - } - // match: (SHLLconst [d] (MOVLconst [c])) - // result: (MOVLconst [c << uint64(d)]) - for { - d := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64MOVQconst { break } - c := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(c << uint64(d)) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(int32(d)) >> uint64(c)) return true } return false } -func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64SARQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (SHLQ x (MOVQconst [c])) - // result: (SHLQconst [int8(c&63)] x) + // match: (SARQ x (MOVQconst [c])) + // result: (SARQconst [int8(c&63)] x) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SHLQconst) + v.reset(OpAMD64SARQconst) v.AuxInt = int8ToAuxInt(int8(c & 63)) v.AddArg(x) return true } - // match: (SHLQ x (MOVLconst [c])) - // result: (SHLQconst [int8(c&63)] x) + // match: (SARQ x (MOVLconst [c])) + // result: (SARQconst [int8(c&63)] x) for { x := v_0 if v_1.Op != OpAMD64MOVLconst { break } c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SHLQconst) + v.reset(OpAMD64SARQconst) v.AuxInt = int8ToAuxInt(int8(c & 63)) v.AddArg(x) return true } - // match: (SHLQ x (ADDQconst [c] y)) + // match: (SARQ x (ADDQconst [c] y)) // cond: c & 63 == 0 - // result: (SHLQ x y) + // result: (SARQ x y) for { x := v_0 if v_1.Op != OpAMD64ADDQconst { @@ -26367,13 +30065,13 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 0) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v.AddArg2(x, y) return true } - // match: (SHLQ x (NEGQ (ADDQconst [c] y))) + // match: (SARQ x (NEGQ (ADDQconst [c] y))) // cond: c & 63 == 0 - // result: (SHLQ x (NEGQ y)) + // result: (SARQ x (NEGQ y)) for { x := v_0 if v_1.Op != OpAMD64NEGQ { @@ -26389,15 +30087,15 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 0) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLQ x (ANDQconst [c] y)) + // match: (SARQ x (ANDQconst [c] y)) // cond: c & 63 == 63 - // result: (SHLQ x y) + // result: (SARQ x y) for { x := v_0 if v_1.Op != OpAMD64ANDQconst { @@ -26408,13 +30106,13 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 63) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v.AddArg2(x, y) return true } - // match: (SHLQ x (NEGQ (ANDQconst [c] y))) + // match: (SARQ x (NEGQ (ANDQconst [c] y))) // cond: c & 63 == 63 - // result: (SHLQ x (NEGQ y)) + // result: (SARQ x (NEGQ y)) for { x := v_0 if v_1.Op != OpAMD64NEGQ { @@ -26430,15 +30128,15 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 63) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLQ x (ADDLconst [c] y)) + // match: (SARQ x (ADDLconst [c] y)) // cond: c & 63 == 0 - // result: (SHLQ x y) + // result: (SARQ x y) for { x := v_0 if v_1.Op != OpAMD64ADDLconst { @@ -26449,13 +30147,13 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 0) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v.AddArg2(x, y) return true } - // match: (SHLQ x (NEGL (ADDLconst [c] y))) + // match: (SARQ x (NEGL (ADDLconst [c] y))) // cond: c & 63 == 0 - // result: (SHLQ x (NEGL y)) + // result: (SARQ x (NEGL y)) for { x := v_0 if v_1.Op != OpAMD64NEGL { @@ -26471,15 +30169,15 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 0) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLQ x (ANDLconst [c] y)) + // match: (SARQ x (ANDLconst [c] y)) // cond: c & 63 == 63 - // result: (SHLQ x y) + // result: (SARQ x y) for { x := v_0 if v_1.Op != OpAMD64ANDLconst { @@ -26490,13 +30188,13 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 63) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v.AddArg2(x, y) return true } - // match: (SHLQ x (NEGL (ANDLconst [c] y))) + // match: (SARQ x (NEGL (ANDLconst [c] y))) // cond: c & 63 == 63 - // result: (SHLQ x (NEGL y)) + // result: (SARQ x (NEGL y)) for { x := v_0 if v_1.Op != OpAMD64NEGL { @@ -26512,15 +30210,15 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(c&63 == 63) { break } - v.reset(OpAMD64SHLQ) + v.reset(OpAMD64SARQ) v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) v0.AddArg(y) v.AddArg2(x, v0) return true } - // match: (SHLQ l:(MOVQload [off] {sym} ptr mem) x) + // match: (SARQ l:(MOVQload [off] {sym} ptr mem) x) // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHLXQload [off] {sym} ptr x mem) + // result: (SARXQload [off] {sym} ptr x mem) for { l := v_0 if l.Op != OpAMD64MOVQload { @@ -26534,7 +30232,7 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLXQload) + v.reset(OpAMD64SARXQload) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(ptr, x, mem) @@ -26542,9 +30240,9 @@ func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64SARQconst(v *Value) bool { v_0 := v.Args[0] - // match: (SHLQconst x [0]) + // match: (SARQconst x [0]) // result: x for { if auxIntToInt8(v.AuxInt) != 0 { @@ -26554,68 +30252,85 @@ func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { v.copyOf(x) return true } - // match: (SHLQconst [1] x) - // result: (ADDQ x x) + // match: (SARQconst [c] (MOVQconst [d])) + // result: (MOVQconst [d>>uint64(c)]) for { - if auxIntToInt8(v.AuxInt) != 1 { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - x := v_0 - v.reset(OpAMD64ADDQ) - v.AddArg2(x, x) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(d >> uint64(c)) return true } - // match: (SHLQconst [c] (ADDQ x x)) - // cond: c < 63 - // result: (SHLQconst [c+1] x) - for { - c := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64ADDQ { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] || !(c < 63) { + return false +} +func rewriteValueAMD64_OpAMD64SARW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SARW x (MOVQconst [c])) + // result: (SARWconst [int8(min(int64(c)&31,15))] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64SHLQconst) - v.AuxInt = int8ToAuxInt(c + 1) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SARWconst) + v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15))) v.AddArg(x) return true } - // match: (SHLQconst [d] (MOVQconst [c])) - // result: (MOVQconst [c << uint64(d)]) + // match: (SARW x (MOVLconst [c])) + // result: (SARWconst [int8(min(int64(c)&31,15))] x) for { - d := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - c := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(c << uint64(d)) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SARWconst) + v.AuxInt = int8ToAuxInt(int8(min(int64(c)&31, 15))) + v.AddArg(x) return true } - // match: (SHLQconst [d] (MOVLconst [c])) - // result: (MOVQconst [int64(c) << uint64(d)]) + return false +} +func rewriteValueAMD64_OpAMD64SARWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SARWconst x [0]) + // result: x for { - d := auxIntToInt8(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + if auxIntToInt8(v.AuxInt) != 0 { break } - c := auxIntToInt32(v_0.AuxInt) + x := v_0 + v.copyOf(x) + return true + } + // match: (SARWconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(int16(d))>>uint64(c)]) + for { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { + break + } + d := auxIntToInt64(v_0.AuxInt) v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(c) << uint64(d)) + v.AuxInt = int64ToAuxInt(int64(int16(d)) >> uint64(c)) return true } return false } -func rewriteValueAMD64_OpAMD64SHLXLload(v *Value) bool { +func rewriteValueAMD64_OpAMD64SARXLload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SHLXLload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SHLLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + // match: (SARXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SARLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -26625,7 +30340,7 @@ func rewriteValueAMD64_OpAMD64SHLXLload(v *Value) bool { } c := auxIntToInt32(v_1.AuxInt) mem := v_2 - v.reset(OpAMD64SHLLconst) + v.reset(OpAMD64SARLconst) v.AuxInt = int8ToAuxInt(int8(c & 31)) v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) v0.AuxInt = int32ToAuxInt(off) @@ -26636,14 +30351,14 @@ func rewriteValueAMD64_OpAMD64SHLXLload(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { +func rewriteValueAMD64_OpAMD64SARXQload(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SHLXQload [off] {sym} ptr (MOVQconst [c]) mem) - // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (SARXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -26653,7 +30368,7 @@ func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { } c := auxIntToInt64(v_1.AuxInt) mem := v_2 - v.reset(OpAMD64SHLQconst) + v.reset(OpAMD64SARQconst) v.AuxInt = int8ToAuxInt(int8(c & 63)) v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) v0.AuxInt = int32ToAuxInt(off) @@ -26662,8 +30377,8 @@ func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { v.AddArg(v0) return true } - // match: (SHLXQload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (SARXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SARQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -26673,7 +30388,7 @@ func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { } c := auxIntToInt32(v_1.AuxInt) mem := v_2 - v.reset(OpAMD64SHLQconst) + v.reset(OpAMD64SARQconst) v.AuxInt = int8ToAuxInt(int8(c & 63)) v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) v0.AuxInt = int32ToAuxInt(off) @@ -26684,1191 +30399,1019 @@ func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64SHRB(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SBBLcarrymask(v *Value) bool { v_0 := v.Args[0] - // match: (SHRB x (MOVQconst [c])) - // cond: c&31 < 8 - // result: (SHRBconst [int8(c&31)] x) + // match: (SBBLcarrymask (FlagEQ)) + // result: (MOVLconst [0]) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&31 < 8) { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SHRBconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SHRB x (MOVLconst [c])) - // cond: c&31 < 8 - // result: (SHRBconst [int8(c&31)] x) + // match: (SBBLcarrymask (FlagLT_ULT)) + // result: (MOVLconst [-1]) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(c&31 < 8) { + if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64SHRBconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(-1) return true } - // match: (SHRB _ (MOVQconst [c])) - // cond: c&31 >= 8 + // match: (SBBLcarrymask (FlagLT_UGT)) // result: (MOVLconst [0]) for { - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&31 >= 8) { + if v_0.Op != OpAMD64FlagLT_UGT { break } v.reset(OpAMD64MOVLconst) v.AuxInt = int32ToAuxInt(0) return true } - // match: (SHRB _ (MOVLconst [c])) - // cond: c&31 >= 8 - // result: (MOVLconst [0]) + // match: (SBBLcarrymask (FlagGT_ULT)) + // result: (MOVLconst [-1]) for { - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(c&31 >= 8) { + if v_0.Op != OpAMD64FlagGT_ULT { break } v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.AuxInt = int32ToAuxInt(-1) return true } - return false -} -func rewriteValueAMD64_OpAMD64SHRBconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SHRBconst x [0]) - // result: x + // match: (SBBLcarrymask (FlagGT_UGT)) + // result: (MOVLconst [0]) for { - if auxIntToInt8(v.AuxInt) != 0 { + if v_0.Op != OpAMD64FlagGT_UGT { break } - x := v_0 - v.copyOf(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } return false } -func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool { +func rewriteValueAMD64_OpAMD64SBBQ(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (SHRL x (MOVQconst [c])) - // result: (SHRLconst [int8(c&31)] x) + // match: (SBBQ x (MOVQconst [c]) borrow) + // cond: is32Bit(c) + // result: (SBBQconst x [int32(c)] borrow) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SHRLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) - return true - } - // match: (SHRL x (MOVLconst [c])) - // result: (SHRLconst [int8(c&31)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + borrow := v_2 + if !(is32Bit(c)) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SHRLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + v.reset(OpAMD64SBBQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg2(x, borrow) return true } - // match: (SHRL x (ADDQconst [c] y)) - // cond: c & 31 == 0 - // result: (SHRL x y) + // match: (SBBQ x y (FlagEQ)) + // result: (SUBQborrow x y) for { x := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 0) { + y := v_1 + if v_2.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SHRL) + v.reset(OpAMD64SUBQborrow) v.AddArg2(x, y) return true } - // match: (SHRL x (NEGQ (ADDQconst [c] y))) - // cond: c & 31 == 0 - // result: (SHRL x (NEGQ y)) + return false +} +func rewriteValueAMD64_OpAMD64SBBQcarrymask(v *Value) bool { + v_0 := v.Args[0] + // match: (SBBQcarrymask (FlagEQ)) + // result: (MOVQconst [0]) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDQconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 0) { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SHRL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) return true } - // match: (SHRL x (ANDQconst [c] y)) - // cond: c & 31 == 31 - // result: (SHRL x y) + // match: (SBBQcarrymask (FlagLT_ULT)) + // result: (MOVQconst [-1]) for { - x := v_0 - if v_1.Op != OpAMD64ANDQconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 31) { + if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(-1) return true } - // match: (SHRL x (NEGQ (ANDQconst [c] y))) - // cond: c & 31 == 31 - // result: (SHRL x (NEGQ y)) + // match: (SBBQcarrymask (FlagLT_UGT)) + // result: (MOVQconst [0]) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDQconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 31) { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64SHRL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) return true } - // match: (SHRL x (ADDLconst [c] y)) - // cond: c & 31 == 0 - // result: (SHRL x y) + // match: (SBBQcarrymask (FlagGT_ULT)) + // result: (MOVQconst [-1]) for { - x := v_0 - if v_1.Op != OpAMD64ADDLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 0) { + if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(-1) return true } - // match: (SHRL x (NEGL (ADDLconst [c] y))) - // cond: c & 31 == 0 - // result: (SHRL x (NEGL y)) + // match: (SBBQcarrymask (FlagGT_UGT)) + // result: (MOVQconst [0]) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDLconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 0) { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64SHRL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) return true } - // match: (SHRL x (ANDLconst [c] y)) - // cond: c & 31 == 31 - // result: (SHRL x y) + return false +} +func rewriteValueAMD64_OpAMD64SBBQconst(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SBBQconst x [c] (FlagEQ)) + // result: (SUBQconstborrow x [c]) for { + c := auxIntToInt32(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64ANDLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&31 == 31) { + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + v.reset(OpAMD64SUBQconstborrow) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) return true } - // match: (SHRL x (NEGL (ANDLconst [c] y))) - // cond: c & 31 == 31 - // result: (SHRL x (NEGL y)) + return false +} +func rewriteValueAMD64_OpAMD64SETA(v *Value) bool { + v_0 := v.Args[0] + // match: (SETA (InvertFlags x)) + // result: (SETB x) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDLconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&31 == 31) { + if v_0.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64SHRL) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + x := v_0.Args[0] + v.reset(OpAMD64SETB) + v.AddArg(x) return true } - // match: (SHRL l:(MOVLload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHRXLload [off] {sym} ptr x mem) + // match: (SETA (FlagEQ)) + // result: (MOVLconst [0]) for { - l := v_0 - if l.Op != OpAMD64MOVLload { + if v_0.Op != OpAMD64FlagEQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETA (FlagLT_ULT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64SHRXLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SHRLconst [1] (ADDL x x)) - // result: (ANDLconst [0x7fffffff] x) + // match: (SETA (FlagLT_UGT)) + // result: (MOVLconst [1]) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL { + if v_0.Op != OpAMD64FlagLT_UGT { break } - x := v_0.Args[1] - if x != v_0.Args[0] { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETA (FlagGT_ULT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64ANDLconst) - v.AuxInt = int32ToAuxInt(0x7fffffff) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SHRLconst x [0]) - // result: x + // match: (SETA (FlagGT_UGT)) + // result: (MOVLconst [1]) for { - if auxIntToInt8(v.AuxInt) != 0 { + if v_0.Op != OpAMD64FlagGT_UGT { break } - x := v_0 - v.copyOf(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } return false } -func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (SHRQ x (MOVQconst [c])) - // result: (SHRQconst [int8(c&63)] x) + typ := &b.Func.Config.Types + // match: (SETAE (TESTQ x x)) + // result: (ConstBool [true]) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64TESTQ { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64SHRQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) - return true - } - // match: (SHRQ x (MOVLconst [c])) - // result: (SHRQconst [int8(c&63)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SHRQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v.AddArg(x) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(true) return true } - // match: (SHRQ x (ADDQconst [c] y)) - // cond: c & 63 == 0 - // result: (SHRQ x y) + // match: (SETAE (TESTL x x)) + // result: (ConstBool [true]) for { - x := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64TESTL { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 0) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(true) return true } - // match: (SHRQ x (NEGQ (ADDQconst [c] y))) - // cond: c & 63 == 0 - // result: (SHRQ x (NEGQ y)) + // match: (SETAE (TESTW x x)) + // result: (ConstBool [true]) for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64TESTW { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 0) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64SHRQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(true) return true } - // match: (SHRQ x (ANDQconst [c] y)) - // cond: c & 63 == 63 - // result: (SHRQ x y) + // match: (SETAE (TESTB x x)) + // result: (ConstBool [true]) for { - x := v_0 - if v_1.Op != OpAMD64ANDQconst { + if v_0.Op != OpAMD64TESTB { break } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 63) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(true) return true } - // match: (SHRQ x (NEGQ (ANDQconst [c] y))) - // cond: c & 63 == 63 - // result: (SHRQ x (NEGQ y)) - for { - x := v_0 - if v_1.Op != OpAMD64NEGQ { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDQconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 63) { - break - } - v.reset(OpAMD64SHRQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(y) - v.AddArg2(x, v0) - return true - } - // match: (SHRQ x (ADDLconst [c] y)) - // cond: c & 63 == 0 - // result: (SHRQ x y) - for { - x := v_0 - if v_1.Op != OpAMD64ADDLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 0) { - break - } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) - return true - } - // match: (SHRQ x (NEGL (ADDLconst [c] y))) - // cond: c & 63 == 0 - // result: (SHRQ x (NEGL y)) + // match: (SETAE (BTLconst [0] x)) + // result: (XORLconst [1] (ANDLconst [1] x)) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ADDLconst { - break - } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 0) { + if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 { break } - v.reset(OpAMD64SHRQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) + v0.AuxInt = int32ToAuxInt(1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (SHRQ x (ANDLconst [c] y)) - // cond: c & 63 == 63 - // result: (SHRQ x y) + // match: (SETAE (BTQconst [0] x)) + // result: (XORLconst [1] (ANDLconst [1] x)) for { - x := v_0 - if v_1.Op != OpAMD64ANDLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - y := v_1.Args[0] - if !(c&63 == 63) { + if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) + v0.AuxInt = int32ToAuxInt(1) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (SHRQ x (NEGL (ANDLconst [c] y))) - // cond: c & 63 == 63 - // result: (SHRQ x (NEGL y)) + // match: (SETAE c:(CMPQconst [128] x)) + // cond: c.Uses == 1 + // result: (SETA (CMPQconst [127] x)) for { - x := v_0 - if v_1.Op != OpAMD64NEGL { - break - } - t := v_1.Type - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64ANDLconst { + c := v_0 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { break } - c := auxIntToInt32(v_1_0.AuxInt) - y := v_1_0.Args[0] - if !(c&63 == 63) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64SHRQ) - v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64SETA) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (SHRQ l:(MOVQload [off] {sym} ptr mem) x) - // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) - // result: (SHRXQload [off] {sym} ptr x mem) + // match: (SETAE c:(CMPLconst [128] x)) + // cond: c.Uses == 1 + // result: (SETA (CMPLconst [127] x)) for { - l := v_0 - if l.Op != OpAMD64MOVQload { + c := v_0 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - x := v_1 - if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64SHRXQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64SETA) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SHRQconst [1] (ADDQ x x)) - // result: (BTRQconst [63] x) + // match: (SETAE (InvertFlags x)) + // result: (SETBE x) for { - if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ { - break - } - x := v_0.Args[1] - if x != v_0.Args[0] { + if v_0.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64BTRQconst) - v.AuxInt = int8ToAuxInt(63) + x := v_0.Args[0] + v.reset(OpAMD64SETBE) v.AddArg(x) return true } - // match: (SHRQconst x [0]) - // result: x - for { - if auxIntToInt8(v.AuxInt) != 0 { - break - } - x := v_0 - v.copyOf(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SHRW(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SHRW x (MOVQconst [c])) - // cond: c&31 < 16 - // result: (SHRWconst [int8(c&31)] x) + // match: (SETAE (FlagEQ)) + // result: (MOVLconst [1]) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&31 < 16) { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SHRWconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - // match: (SHRW x (MOVLconst [c])) - // cond: c&31 < 16 - // result: (SHRWconst [int8(c&31)] x) + // match: (SETAE (FlagLT_ULT)) + // result: (MOVLconst [0]) for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(c&31 < 16) { + if v_0.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64SHRWconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SHRW _ (MOVQconst [c])) - // cond: c&31 >= 16 - // result: (MOVLconst [0]) + // match: (SETAE (FlagLT_UGT)) + // result: (MOVLconst [1]) for { - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c&31 >= 16) { + if v_0.Op != OpAMD64FlagLT_UGT { break } v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.AuxInt = int32ToAuxInt(1) return true } - // match: (SHRW _ (MOVLconst [c])) - // cond: c&31 >= 16 + // match: (SETAE (FlagGT_ULT)) // result: (MOVLconst [0]) for { - if v_1.Op != OpAMD64MOVLconst { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(c&31 >= 16) { + if v_0.Op != OpAMD64FlagGT_ULT { break } v.reset(OpAMD64MOVLconst) v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64SHRWconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SHRWconst x [0]) - // result: x + // match: (SETAE (FlagGT_UGT)) + // result: (MOVLconst [1]) for { - if auxIntToInt8(v.AuxInt) != 0 { + if v_0.Op != OpAMD64FlagGT_UGT { break } - x := v_0 - v.copyOf(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } return false } -func rewriteValueAMD64_OpAMD64SHRXLload(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETAEstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SHRXLload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SHRLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) + // match: (SETAEstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETBEstore [off] {sym} ptr x mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + if v_1.Op != OpAMD64InvertFlags { break } - c := auxIntToInt32(v_1.AuxInt) + x := v_1.Args[0] mem := v_2 - v.reset(OpAMD64SHRLconst) - v.AuxInt = int8ToAuxInt(int8(c & 31)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) + v.reset(OpAMD64SETBEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SHRXQload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SHRXQload [off] {sym} ptr (MOVQconst [c]) mem) - // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (SETAEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETAEstore [off1+off2] {sym} base val mem) for { - off := auxIntToInt32(v.AuxInt) + off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64ADDQconst { break } - c := auxIntToInt64(v_1.AuxInt) + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64SHRQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (SHRXQload [off] {sym} ptr (MOVLconst [c]) mem) - // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) + // match: (SETAEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETAEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - c := auxIntToInt32(v_1.AuxInt) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64SHRQconst) - v.AuxInt = int8ToAuxInt(int8(c & 63)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg(v0) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64SUBL(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (SUBL x (MOVLconst [c])) - // result: (SUBLconst x [c]) - for { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64SUBLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (SUBL (MOVLconst [c]) x) - // result: (NEGL (SUBLconst x [c])) + // match: (SETAEstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if v_0.Op != OpAMD64MOVLconst { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64NEGL) - v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type) - v0.AuxInt = int32ToAuxInt(c) - v0.AddArg(x) - v.AddArg(v0) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBL x x) - // result: (MOVLconst [0]) + // match: (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - if x != v_1 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBL x l:(MOVLload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (SUBLload x [off] {sym} ptr mem) + // match: (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVLload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64SUBLload) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBLconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SUBLconst [0] x) - // result: x + // match: (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - if auxIntToInt32(v.AuxInt) != 0 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - x := v_0 - v.copyOf(x) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBLconst [c] x) - // result: (ADDLconst [-c] x) + // match: (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - v.reset(OpAMD64ADDLconst) - v.AuxInt = int32ToAuxInt(-c) - v.AddArg(x) + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { + break + } + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } + return false } -func rewriteValueAMD64_OpAMD64SUBLload(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETAstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SUBLload [off1] {sym} val (ADDQconst [off2] base) mem) + // match: (SETAstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETBstore [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { + break + } + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + // match: (SETAstore [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBLload [off1+off2] {sym} val base mem) + // result: (SETAstore [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SUBLload) + v.reset(OpAMD64SETAstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.AddArg3(base, val, mem) return true } - // match: (SUBLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // match: (SETAstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // result: (SETAstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SUBLload) + v.reset(OpAMD64SETAstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.AddArg3(base, val, mem) return true } - // match: (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) - // result: (SUBL x (MOVLf2i y)) + // match: (SETAstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { - break - } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SUBL) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) - v0.AddArg(y) - v.AddArg2(x, v0) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBLmodify(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBLmodify [off1+off2] {sym} base val mem) + // match: (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - off1 := auxIntToInt32(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64SUBLmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64SUBLmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SUBQ(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETB(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (SUBQ x (MOVQconst [c])) - // cond: is32Bit(c) - // result: (SUBQconst x [int32(c)]) + // match: (SETB (TESTQ x x)) + // result: (ConstBool [false]) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64TESTQ { break } - c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c)) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64SUBQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg(x) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(false) return true } - // match: (SUBQ (MOVQconst [c]) x) - // cond: is32Bit(c) - // result: (NEGQ (SUBQconst x [int32(c)])) + // match: (SETB (TESTL x x)) + // result: (ConstBool [false]) for { - if v_0.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64TESTL { break } - c := auxIntToInt64(v_0.AuxInt) - x := v_1 - if !(is32Bit(c)) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type) - v0.AuxInt = int32ToAuxInt(int32(c)) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(false) return true } - // match: (SUBQ x x) - // result: (MOVLconst [0]) + // match: (SETB (TESTW x x)) + // result: (ConstBool [false]) for { - x := v_0 - if x != v_1 { + if v_0.Op != OpAMD64TESTW { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + x := v_0.Args[1] + if x != v_0.Args[0] { + break + } + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(false) return true } - // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (SUBQload x [off] {sym} ptr mem) + // match: (SETB (TESTB x x)) + // result: (ConstBool [false]) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVQload { + if v_0.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - v.reset(OpAMD64SUBQload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpConstBool) + v.AuxInt = boolToAuxInt(false) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBQborrow(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SUBQborrow x (MOVQconst [c])) - // cond: is32Bit(c) - // result: (SUBQconstborrow x [int32(c)]) + // match: (SETB (BTLconst [0] x)) + // result: (ANDLconst [1] x) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 { break } - c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c)) { + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(1) + v.AddArg(x) + return true + } + // match: (SETB (BTQconst [0] x)) + // result: (ANDQconst [1] x) + for { + if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 { break } - v.reset(OpAMD64SUBQconstborrow) - v.AuxInt = int32ToAuxInt(int32(c)) + x := v_0.Args[0] + v.reset(OpAMD64ANDQconst) + v.AuxInt = int32ToAuxInt(1) v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (SUBQconst [0] x) - // result: x + // match: (SETB c:(CMPQconst [128] x)) + // cond: c.Uses == 1 + // result: (SETBE (CMPQconst [127] x)) for { - if auxIntToInt32(v.AuxInt) != 0 { + c := v_0 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { break } - x := v_0 - v.copyOf(x) + x := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (SUBQconst [c] x) - // cond: c != -(1<<31) - // result: (ADDQconst [-c] x) + // match: (SETB c:(CMPLconst [128] x)) + // cond: c.Uses == 1 + // result: (SETBE (CMPLconst [127] x)) for { - c := auxIntToInt32(v.AuxInt) - x := v_0 - if !(c != -(1 << 31)) { + c := v_0 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { break } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(-c) + x := c.Args[0] + if !(c.Uses == 1) { + break + } + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (SETB (InvertFlags x)) + // result: (SETA x) + for { + if v_0.Op != OpAMD64InvertFlags { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETA) v.AddArg(x) return true } - // match: (SUBQconst (MOVQconst [d]) [c]) - // result: (MOVQconst [d-int64(c)]) + // match: (SETB (FlagEQ)) + // result: (MOVLconst [0]) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64FlagEQ { break } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(d - int64(c)) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SUBQconst (SUBQconst x [d]) [c]) - // cond: is32Bit(int64(-c)-int64(d)) - // result: (ADDQconst [-c-d] x) + // match: (SETB (FlagLT_ULT)) + // result: (MOVLconst [1]) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64SUBQconst { + if v_0.Op != OpAMD64FlagLT_ULT { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(is32Bit(int64(-c) - int64(d))) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETB (FlagLT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(-c - d) - v.AddArg(x) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETB (FlagGT_ULT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETB (FlagGT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } return false } -func rewriteValueAMD64_OpAMD64SUBQload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETBE(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SUBQload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBQload [off1+off2] {sym} val base mem) + // match: (SETBE (InvertFlags x)) + // result: (SETAE x) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64InvertFlags { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0.Args[0] + v.reset(OpAMD64SETAE) + v.AddArg(x) + return true + } + // match: (SETBE (FlagEQ)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SUBQload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - // match: (SUBQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (SETBE (FlagLT_ULT)) + // result: (MOVLconst [1]) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETBE (FlagLT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64SUBQload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) - // result: (SUBQ x (MOVQf2i y)) + // match: (SETBE (FlagGT_ULT)) + // result: (MOVLconst [1]) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + if v_0.Op != OpAMD64FlagGT_ULT { break } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETBE (FlagGT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64SUBQ) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) - v0.AddArg(y) - v.AddArg2(x, v0) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } return false } -func rewriteValueAMD64_OpAMD64SUBQmodify(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETBEstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETBEstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETAEstore [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { + break + } + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + // match: (SETBEstore [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBQmodify [off1+off2] {sym} base val mem) + // result: (SETBEstore [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -27882,15 +31425,15 @@ func rewriteValueAMD64_OpAMD64SUBQmodify(v *Value) bool { if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SUBQmodify) + v.reset(OpAMD64SETBEstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) v.AddArg3(base, val, mem) return true } - // match: (SUBQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (SETBEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (SETBEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) @@ -27905,25394 +31448,25103 @@ func rewriteValueAMD64_OpAMD64SUBQmodify(v *Value) bool { if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SUBQmodify) + v.reset(OpAMD64SETBEstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBSD(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (SUBSDload x [off] {sym} ptr mem) + // match: (SETBEstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSDload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SUBSDload) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBSDload(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (SUBSDload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBSDload [off1+off2] {sym} val base mem) + // match: (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - off1 := auxIntToInt32(v.AuxInt) + off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break - } - v.reset(OpAMD64SUBSDload) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // match: (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64SUBSDload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - // match: (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) - // result: (SUBSD x (MOVQi2f y)) + // match: (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { - break - } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64SUBSD) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64) - v0.AddArg(y) - v.AddArg2(x, v0) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64SUBSS(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (SUBSSload x [off] {sym} ptr mem) + // match: (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVSSload { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64SUBSSload) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64SUBSSload(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETBstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SUBSSload [off1] {sym} val (ADDQconst [off2] base) mem) + // match: (SETBstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETAstore [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { + break + } + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETAstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) + return true + } + // match: (SETBstore [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) - // result: (SUBSSload [off1+off2] {sym} val base mem) + // result: (SETBstore [off1+off2] {sym} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64ADDQconst { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 mem := v_2 if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SUBSSload) + v.reset(OpAMD64SETBstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) + v.AddArg3(base, val, mem) return true } - // match: (SUBSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // match: (SETBstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (SUBSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + // result: (SETBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := auxIntToInt32(v.AuxInt) sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64LEAQ { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64SUBSSload) + v.reset(OpAMD64SETBstore) v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) + v.AddArg3(base, val, mem) return true } - // match: (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) - // result: (SUBSS x (MOVLi2f y)) + // match: (SETBstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { - break - } - y := v_2.Args[1] - if ptr != v_2.Args[0] { + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64SUBSS) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32) - v0.AddArg(y) - v.AddArg2(x, v0) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64TESTB(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (TESTB (MOVLconst [c]) x) - // result: (TESTBconst [int8(c)] x) + // match: (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64TESTBconst) - v.AuxInt = int8ToAuxInt(int8(c)) - v.AddArg(x) - return true + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { + break } - break + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) + return true } - // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2) - // cond: l == l2 && l.Uses == 2 && clobber(l) - // result: @l.Block (CMPBconstload {sym} [makeValAndOff(0, off)] ptr mem) + // match: (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - l := v_0 - if l.Op != OpAMD64MOVBload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - l2 := v_1 - if !(l == l2 && l.Uses == 2 && clobber(l)) { - continue - } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPBconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { + break } - break + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) + return true } - return false -} -func rewriteValueAMD64_OpAMD64TESTBconst(v *Value) bool { - v_0 := v.Args[0] - // match: (TESTBconst [-1] x) - // cond: x.Op != OpAMD64MOVLconst - // result: (TESTB x x) + // match: (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if auxIntToInt8(v.AuxInt) != -1 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - x := v_0 - if !(x.Op != OpAMD64MOVLconst) { + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64TESTB) - v.AddArg2(x, x) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (TESTL (MOVLconst [c]) x) - // result: (TESTLconst [c] x) + // match: (SETEQ (TESTL (SHLL (MOVLconst [1]) x) y)) + // result: (SETAE (BTL x y)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64TESTLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true + if v_0.Op != OpAMD64TESTL { + break } - break - } - // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2) - // cond: l == l2 && l.Uses == 2 && clobber(l) - // result: @l.Block (CMPLconstload {sym} [makeValAndOff(0, off)] ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - l := v_0 - if l.Op != OpAMD64MOVLload { + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLL { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - l2 := v_1 - if !(l == l2 && l.Uses == 2 && clobber(l)) { + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { continue } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPLconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + y := v_0_1 + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } break } - // match: (TESTL a:(ANDLload [off] {sym} x ptr mem) a) - // cond: a.Uses == 2 && a.Block == v.Block && clobber(a) - // result: (TESTL (MOVLload [off] {sym} ptr mem) x) + // match: (SETEQ (TESTQ (SHLQ (MOVQconst [1]) x) y)) + // result: (SETAE (BTQ x y)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - a := v_0 - if a.Op != OpAMD64ANDLload { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLQ { continue } - off := auxIntToInt32(a.AuxInt) - sym := auxToSym(a.Aux) - mem := a.Args[2] - x := a.Args[0] - ptr := a.Args[1] - if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) { + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { continue } - v.reset(OpAMD64TESTL) - v0 := b.NewValue0(a.Pos, OpAMD64MOVLload, a.Type) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg2(v0, x) + y := v_0_1 + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64TESTLconst(v *Value) bool { - v_0 := v.Args[0] - // match: (TESTLconst [c] (MOVLconst [c])) - // cond: c == 0 - // result: (FlagEQ) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c == 0) { - break - } - v.reset(OpAMD64FlagEQ) - return true - } - // match: (TESTLconst [c] (MOVLconst [c])) - // cond: c < 0 - // result: (FlagLT_UGT) + // match: (SETEQ (TESTLconst [c] x)) + // cond: isPowerOfTwo(uint32(c)) + // result: (SETAE (BTLconst [int8(log32u(uint32(c)))] x)) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c < 0) { + if v_0.Op != OpAMD64TESTLconst { break } - v.reset(OpAMD64FlagLT_UGT) - return true - } - // match: (TESTLconst [c] (MOVLconst [c])) - // cond: c > 0 - // result: (FlagGT_UGT) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c > 0) { + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint32(c))) { break } - v.reset(OpAMD64FlagGT_UGT) + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (TESTLconst [-1] x) - // cond: x.Op != OpAMD64MOVLconst - // result: (TESTL x x) + // match: (SETEQ (TESTQconst [c] x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETAE (BTQconst [int8(log32u(uint32(c)))] x)) for { - if auxIntToInt32(v.AuxInt) != -1 { + if v_0.Op != OpAMD64TESTQconst { break } - x := v_0 - if !(x.Op != OpAMD64MOVLconst) { + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint64(c))) { break } - v.reset(OpAMD64TESTL) - v.AddArg2(x, x) + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64TESTQ(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (TESTQ (MOVQconst [c]) x) - // cond: is32Bit(c) - // result: (TESTQconst [int32(c)] x) + // match: (SETEQ (TESTQ (MOVQconst [c]) x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETAE (BTQconst [int8(log64u(uint64(c)))] x)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64MOVQconst { continue } - c := auxIntToInt64(v_0.AuxInt) - x := v_1 - if !(is32Bit(c)) { + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPowerOfTwo(uint64(c))) { continue } - v.reset(OpAMD64TESTQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg(x) + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + v.AddArg(v0) return true } break } - // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2) - // cond: l == l2 && l.Uses == 2 && clobber(l) - // result: @l.Block (CMPQconstload {sym} [makeValAndOff(0, off)] ptr mem) + // match: (SETEQ (CMPLconst [1] s:(ANDLconst [1] _))) + // result: (SETNE (CMPLconst [0] s)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - l := v_0 - if l.Op != OpAMD64MOVQload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - l2 := v_1 - if !(l == l2 && l.Uses == 2 && clobber(l)) { - continue - } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPQconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 { + break } - break + s := v_0.Args[0] + if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { + break + } + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg(v0) + return true } - // match: (TESTQ a:(ANDQload [off] {sym} x ptr mem) a) - // cond: a.Uses == 2 && a.Block == v.Block && clobber(a) - // result: (TESTQ (MOVQload [off] {sym} ptr mem) x) + // match: (SETEQ (CMPQconst [1] s:(ANDQconst [1] _))) + // result: (SETNE (CMPQconst [0] s)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - a := v_0 - if a.Op != OpAMD64ANDQload { - continue - } - off := auxIntToInt32(a.AuxInt) - sym := auxToSym(a.Aux) - mem := a.Args[2] - x := a.Args[0] - ptr := a.Args[1] - if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) { - continue - } - v.reset(OpAMD64TESTQ) - v0 := b.NewValue0(a.Pos, OpAMD64MOVQload, a.Type) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - v.AddArg2(v0, x) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64TESTQconst(v *Value) bool { - v_0 := v.Args[0] - // match: (TESTQconst [c] (MOVQconst [d])) - // cond: int64(c) == d && c == 0 - // result: (FlagEQ) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { - break - } - d := auxIntToInt64(v_0.AuxInt) - if !(int64(c) == d && c == 0) { - break - } - v.reset(OpAMD64FlagEQ) - return true - } - // match: (TESTQconst [c] (MOVQconst [d])) - // cond: int64(c) == d && c < 0 - // result: (FlagLT_UGT) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { - break - } - d := auxIntToInt64(v_0.AuxInt) - if !(int64(c) == d && c < 0) { - break - } - v.reset(OpAMD64FlagLT_UGT) - return true - } - // match: (TESTQconst [c] (MOVQconst [d])) - // cond: int64(c) == d && c > 0 - // result: (FlagGT_UGT) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 { break } - d := auxIntToInt64(v_0.AuxInt) - if !(int64(c) == d && c > 0) { + s := v_0.Args[0] + if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { break } - v.reset(OpAMD64FlagGT_UGT) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg(v0) return true } - // match: (TESTQconst [-1] x) - // cond: x.Op != OpAMD64MOVQconst - // result: (TESTQ x x) + // match: (SETEQ (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) + // cond: z1==z2 + // result: (SETAE (BTQconst [63] x)) for { - if auxIntToInt32(v.AuxInt) != -1 { - break - } - x := v_0 - if !(x.Op != OpAMD64MOVQconst) { + if v_0.Op != OpAMD64TESTQ { break } - v.reset(OpAMD64TESTQ) - v.AddArg2(x, x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64TESTW(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (TESTW (MOVLconst [c]) x) - // result: (TESTWconst [int16(c)] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVLconst { + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { continue } - c := auxIntToInt32(v_0.AuxInt) - x := v_1 - v.reset(OpAMD64TESTWconst) - v.AuxInt = int16ToAuxInt(int16(c)) - v.AddArg(x) + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg(v0) return true } break } - // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2) - // cond: l == l2 && l.Uses == 2 && clobber(l) - // result: @l.Block (CMPWconstload {sym} [makeValAndOff(0, off)] ptr mem) + // match: (SETEQ (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) + // cond: z1==z2 + // result: (SETAE (BTQconst [31] x)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - l := v_0 - if l.Op != OpAMD64MOVWload { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - l2 := v_1 - if !(l == l2 && l.Uses == 2 && clobber(l)) { + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { continue } - b = l.Block - v0 := b.NewValue0(l.Pos, OpAMD64CMPWconstload, types.TypeFlags) - v.copyOf(v0) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool { - v_0 := v.Args[0] - // match: (TESTWconst [-1] x) - // cond: x.Op != OpAMD64MOVLconst - // result: (TESTW x x) + // match: (SETEQ (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) + // cond: z1==z2 + // result: (SETAE (BTQconst [0] x)) for { - if auxIntToInt16(v.AuxInt) != -1 { - break - } - x := v_0 - if !(x.Op != OpAMD64MOVLconst) { + if v_0.Op != OpAMD64TESTQ { break } - v.reset(OpAMD64TESTW) - v.AddArg2(x, x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VADDPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPD512load {sym} [off] x ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { continue } - v.reset(OpAMD64VADDPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPDMasked128load {sym} [off] x ptr mask mem) + // match: (SETEQ (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) + // cond: z1==z2 + // result: (SETAE (BTLconst [0] x)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { continue } - v.reset(OpAMD64VADDPDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPDMasked256load {sym} [off] x ptr mask mem) + // match: (SETEQ (TESTQ z1:(SHRQconst [63] x) z2)) + // cond: z1==z2 + // result: (SETAE (BTQconst [63] x)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { continue } - v.reset(OpAMD64VADDPDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPDMasked512load {sym} [off] x ptr mask mem) + // match: (SETEQ (TESTL z1:(SHRLconst [31] x) z2)) + // cond: z1==z2 + // result: (SETAE (BTLconst [31] x)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { continue } - v.reset(OpAMD64VADDPDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64SETAE) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPS512load {sym} [off] x ptr mem) + // match: (SETEQ (InvertFlags x)) + // result: (SETEQ x) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64InvertFlags { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETEQ) + v.AddArg(x) + return true + } + // match: (SETEQ (FlagEQ)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagEQ { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETEQ (FlagLT_ULT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_ULT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETEQ (FlagLT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETEQ (FlagGT_ULT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_ULT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETEQ (FlagGT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETEQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) + // result: (SETEQ (Select1 blsr)) + for { + if v_0.Op != OpAMD64TESTQ { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { continue } - v.reset(OpAMD64VADDPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPSMasked128load {sym} [off] x ptr mask mem) + // match: (SETEQ (TESTL s:(Select0 blsr:(BLSRL _)) s)) + // result: (SETEQ (Select1 blsr)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTL { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRL || s != v_0_1 { continue } - v.reset(OpAMD64VADDPSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg(v0) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VADDPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPSMasked256load {sym} [off] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPAND128 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETEQ (VPTEST j k)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VADDPSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VADDPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VADDPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VADDPSMasked512load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VADDPSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPTEST { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) - for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPAND128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { break } - v.reset(OpAMD64VCMPPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPAND256 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETEQ (VPTEST j k)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPTEST { break } - v.reset(OpAMD64VCMPPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) - for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPAND256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { break } - v.reset(OpAMD64VCMPPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPANDD512 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETEQ (VPTEST j k)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDD512 { break } - v.reset(OpAMD64VCMPPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETEQ (VPTEST x:(VPANDQ512 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETEQ (VPTEST j k)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDQ512 { break } - v.reset(OpAMD64VCMPPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPANDN128 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETB (VPTEST k j)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDN128 { break } - v.reset(OpAMD64VCMPPSMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPANDN256 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETB (VPTEST k j)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDN256 { break } - v.reset(OpAMD64VCMPPSMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETEQ (VPTEST x:(VPANDND512 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETB (VPTEST k j)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDND512 { break } - v.reset(OpAMD64VCMPPSMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PD512load {sym} [off] ptr mem) + // match: (SETEQ (VPTEST x:(VPANDNQ512 j k) y)) + // cond: x == y && x.Uses == 2 + // result: (SETB (VPTEST k j)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPTEST { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDNQ512 { break } - v.reset(OpAMD64VCVTDQ2PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTDQ2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PDMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (SETNE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETNE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTDQ2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PDMasked512load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (SETEQ flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETEQ) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PS512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PS512load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (SETGE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETGE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTDQ2PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PSMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (SETLE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETLE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTDQ2PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PSMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (SETG flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETG) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTDQ2PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTDQ2PSMasked512load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (SETL flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETL) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTDQ2PSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPD2PS256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTPD2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPD2PS256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (SETBE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETBE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPD2PS256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPD2PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTPD2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPD2PSMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (SETAE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETAE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPD2PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPD2PSXMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTPD2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPD2PSXMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (SETB flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETB) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPD2PSXMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPD2PSYMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTPD2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPD2PSYMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (SETA flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETA) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPD2PSYMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPS2PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTPS2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPS2PD512load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (SETNE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETNE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPS2PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPS2PDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTPS2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPS2PDMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (SETEQ flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETEQ) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPS2PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTPS2PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTPS2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTPS2PDMasked512load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (SETGE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETGE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTPS2PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PD128load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (SETLE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETLE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PD256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (SETG flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETG) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PD512load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (SETL flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETL) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PDMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (SETBE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETBE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PDMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (SETAE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETAE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PDMasked512load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (SETB flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETB) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PS256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PS256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (SETA flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETA) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PS256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PSMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: (SETNE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETNE) + v.AddArg(flags) + return true + } + break + } + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: (SETEQ flags) + for { + t := v_0 + if t.Op != OpAMD64TESTW { break } - v.reset(OpAMD64VCVTQQ2PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETEQ) + v.AddArg(flags) + return true + } + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PSX128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PSX128load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: (SETGE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETGE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PSX128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PSXMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PSXMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: (SETLE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETLE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PSXMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PSY128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PSY128load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: (SETG flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETG) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PSY128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTQQ2PSYMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTQQ2PSYMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: (SETL flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETL) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTQQ2PSYMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2DQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2DQ256 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2DQ256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: (SETBE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETBE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2DQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2DQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2DQMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2DQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: (SETAE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETAE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2DQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2DQXMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2DQXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2DQXMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: (SETB flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETB) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2DQXMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2DQYMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2DQYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2DQYMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: (SETA flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETA) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2DQYMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2QQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQ128load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: (SETNE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETNE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2QQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQ256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: (SETEQ flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETEQ) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2QQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQ512load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: (SETGE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETGE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2QQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: (SETLE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETLE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2QQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: (SETG flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETG) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2QQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2QQMasked512load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: (SETL flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETL) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2QQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2UDQ256 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQ256load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: (SETBE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETBE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2UDQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UDQMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: (SETAE flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETAE) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2UDQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQX128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2UDQX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQX128load {sym} [off] ptr mem) + // match: (SETEQ t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: (SETB flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETB) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2UDQX128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQXMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UDQXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQXMasked128load {sym} [off] ptr mask mem) + // match: (SETEQ t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: (SETA flags) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.reset(OpAMD64SETA) + v.AddArg(flags) + return true } - v.reset(OpAMD64VCVTTPD2UDQXMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } return false } -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQY128(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VCVTTPD2UDQY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQY128load {sym} [off] ptr mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETEQstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) + // result: (SETAEstore [off] {sym} ptr (BTL x y) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLL { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPD2UDQY128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UDQYMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UDQYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UDQYMasked128load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) + // result: (SETAEstore [off] {sym} ptr (BTQ x y) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLQ { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPD2UDQYMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2UQQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQ128load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem) + // cond: isPowerOfTwo(uint32(c)) + // result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTLconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + c := auxIntToInt32(v_1.AuxInt) + x := v_1.Args[0] + mem := v_2 + if !(isPowerOfTwo(uint32(c))) { break } - v.reset(OpAMD64VCVTTPD2UQQ128load) + v.reset(OpAMD64SETAEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2UQQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQ256load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (TESTQconst [c] x) mem) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + c := auxIntToInt32(v_1.AuxInt) + x := v_1.Args[0] + mem := v_2 + if !(isPowerOfTwo(uint64(c))) { break } - v.reset(OpAMD64VCVTTPD2UQQ256load) + v.reset(OpAMD64SETAEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPD2UQQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQ512load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_1_0.AuxInt) + x := v_1_1 + mem := v_2 + if !(isPowerOfTwo(uint64(c))) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPD2UQQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UQQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQMasked128load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) + // result: (SETNEstore [off] {sym} ptr (CMPLconst [0] s) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + s := v_1.Args[0] + if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { break } - v.reset(OpAMD64VCVTTPD2UQQMasked128load) + mem := v_2 + v.reset(OpAMD64SETNEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UQQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) + // result: (SETNEstore [off] {sym} ptr (CMPQconst [0] s) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + s := v_1.Args[0] + if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { break } - v.reset(OpAMD64VCVTTPD2UQQMasked256load) + mem := v_2 + v.reset(OpAMD64SETNEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPD2UQQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPD2UQQMasked512load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPD2UQQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2DQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2DQ512load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPS2DQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTQconst [0] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPS2DQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTLconst [0] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPS2DQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTQconst [63] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + x := z1.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true } - v.reset(OpAMD64VCVTTPS2DQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2QQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2QQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2QQ256load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) + // cond: z1==z2 + // result: (SETAEstore [off] {sym} ptr (BTLconst [31] x) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + x := z1.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETAEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break + } + // match: (SETEQstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETEQstore [off] {sym} ptr x mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64VCVTTPS2QQ256load) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETEQstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2QQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2QQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2QQ512load {sym} [off] ptr mem) + // match: (SETEQstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETEQstore [off1+off2] {sym} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VCVTTPS2QQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64SETEQstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2QQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2QQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2QQMasked256load {sym} [off] ptr mask mem) + // match: (SETEQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETEQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VCVTTPS2QQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64SETEQstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2QQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2QQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2QQMasked512load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTTPS2QQMasked512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQ128load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64VCVTTPS2UDQ128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQ256load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VCVTTPS2UDQ256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQ512load {sym} [off] ptr mem) + // match: (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64VCVTTPS2UDQ512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2UDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQMasked128load {sym} [off] ptr mask mem) + // match: (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTTPS2UDQMasked128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETG(v *Value) bool { v_0 := v.Args[0] - // match: (VCVTTPS2UDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQMasked256load {sym} [off] ptr mask mem) + // match: (SETG (InvertFlags x)) + // result: (SETL x) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64InvertFlags { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + v.reset(OpAMD64SETL) + v.AddArg(x) + return true + } + // match: (SETG (FlagEQ)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTTPS2UDQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2UDQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UDQMasked512load {sym} [off] ptr mask mem) + // match: (SETG (FlagLT_ULT)) + // result: (MOVLconst [0]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETG (FlagLT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VCVTTPS2UDQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UQQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTTPS2UQQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UQQ256load {sym} [off] ptr mem) + // match: (SETG (FlagGT_ULT)) + // result: (MOVLconst [1]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETG (FlagGT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTTPS2UQQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTTPS2UQQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETGE(v *Value) bool { v_0 := v.Args[0] - // match: (VCVTTPS2UQQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UQQ512load {sym} [off] ptr mem) + b := v.Block + // match: (SETGE c:(CMPQconst [128] x)) + // cond: c.Uses == 1 + // result: (SETG (CMPQconst [127] x)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + c := v_0 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64VCVTTPS2UQQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64SETG) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UQQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2UQQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UQQMasked256load {sym} [off] ptr mask mem) + // match: (SETGE c:(CMPLconst [128] x)) + // cond: c.Uses == 1 + // result: (SETG (CMPLconst [127] x)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + c := v_0 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64VCVTTPS2UQQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64SETG) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTTPS2UQQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTTPS2UQQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTTPS2UQQMasked512load {sym} [off] ptr mask mem) + // match: (SETGE (InvertFlags x)) + // result: (SETLE x) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64InvertFlags { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + v.reset(OpAMD64SETLE) + v.AddArg(x) + return true + } + // match: (SETGE (FlagEQ)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTTPS2UQQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUDQ2PD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PD256load {sym} [off] ptr mem) + // match: (SETGE (FlagLT_ULT)) + // result: (MOVLconst [0]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETGE (FlagLT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { + break } - v.reset(OpAMD64VCVTUDQ2PD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PD512load {sym} [off] ptr mem) + // match: (SETGE (FlagGT_ULT)) + // result: (MOVLconst [1]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETGE (FlagGT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTUDQ2PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTUDQ2PDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETGEstore(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VCVTUDQ2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PDMasked256load {sym} [off] ptr mask mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETGEstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETLEstore [off] {sym} ptr x mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64VCVTUDQ2PDMasked256load) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETLEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUDQ2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PDMasked512load {sym} [off] ptr mask mem) + // match: (SETGEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETGEstore [off1+off2] {sym} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VCVTUDQ2PDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64SETGEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PS128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUDQ2PS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PS128load {sym} [off] ptr mem) + // match: (SETGEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETGEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VCVTUDQ2PS128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64SETGEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PS256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUDQ2PS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PS256load {sym} [off] ptr mem) + // match: (SETGEstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTUDQ2PS256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PS512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PS512load {sym} [off] ptr mem) + // match: (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64VCVTUDQ2PS512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUDQ2PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PSMasked128load {sym} [off] ptr mask mem) + // match: (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VCVTUDQ2PSMasked128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUDQ2PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PSMasked256load {sym} [off] ptr mask mem) + // match: (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTUDQ2PSMasked256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETGstore(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VCVTUDQ2PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUDQ2PSMasked512load {sym} [off] ptr mask mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETGstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETLstore [off] {sym} ptr x mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64VCVTUDQ2PSMasked512load) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETLstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PD128load {sym} [off] ptr mem) + // match: (SETGstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETGstore [off1+off2] {sym} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VCVTUQQ2PD128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64SETGstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PD256load {sym} [off] ptr mem) + // match: (SETGstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETGstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VCVTUQQ2PD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64SETGstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUQQ2PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PD512load {sym} [off] ptr mem) + // match: (SETGstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTUQQ2PD512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PDMasked128load {sym} [off] ptr mask mem) + // match: (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64VCVTUQQ2PDMasked128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUQQ2PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PDMasked256load {sym} [off] ptr mask mem) + // match: (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VCVTUQQ2PDMasked256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUQQ2PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PDMasked512load {sym} [off] ptr mask mem) + // match: (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTUQQ2PDMasked512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTUQQ2PS256(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETL(v *Value) bool { v_0 := v.Args[0] - // match: (VCVTUQQ2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PS256load {sym} [off] ptr mem) + b := v.Block + // match: (SETL c:(CMPQconst [128] x)) + // cond: c.Uses == 1 + // result: (SETLE (CMPQconst [127] x)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + c := v_0 + if c.Op != OpAMD64CMPQconst || auxIntToInt32(c.AuxInt) != 128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64VCVTUQQ2PS256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PSMasked256load {sym} [off] ptr mask mem) + // match: (SETL c:(CMPLconst [128] x)) + // cond: c.Uses == 1 + // result: (SETLE (CMPLconst [127] x)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + c := v_0 + if c.Op != OpAMD64CMPLconst || auxIntToInt32(c.AuxInt) != 128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := c.Args[0] + if !(c.Uses == 1) { break } - v.reset(OpAMD64VCVTUQQ2PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PSX128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PSX128load {sym} [off] ptr mem) + // match: (SETL (InvertFlags x)) + // result: (SETG x) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64InvertFlags { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + v.reset(OpAMD64SETG) + v.AddArg(x) + return true + } + // match: (SETL (FlagEQ)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTUQQ2PSX128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PSXMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VCVTUQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PSXMasked128load {sym} [off] ptr mask mem) + // match: (SETL (FlagLT_ULT)) + // result: (MOVLconst [1]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETL (FlagLT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VCVTUQQ2PSXMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VCVTUQQ2PSY128(v *Value) bool { - v_0 := v.Args[0] - // match: (VCVTUQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PSY128load {sym} [off] ptr mem) + // match: (SETL (FlagGT_ULT)) + // result: (MOVLconst [0]) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETL (FlagGT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VCVTUQQ2PSY128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } return false } -func rewriteValueAMD64_OpAMD64VCVTUQQ2PSYMasked128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETLE(v *Value) bool { v_0 := v.Args[0] - // match: (VCVTUQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VCVTUQQ2PSYMasked128load {sym} [off] ptr mask mem) + // match: (SETLE (InvertFlags x)) + // result: (SETGE x) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64InvertFlags { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + v.reset(OpAMD64SETGE) + v.AddArg(x) + return true + } + // match: (SETLE (FlagEQ)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VCVTUQQ2PSYMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPD512load {sym} [off] x ptr mem) + // match: (SETLE (FlagLT_ULT)) + // result: (MOVLconst [1]) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETLE (FlagLT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VDIVPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPDMasked128load {sym} [off] x ptr mask mem) + // match: (SETLE (FlagGT_ULT)) + // result: (MOVLconst [0]) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (SETLE (FlagGT_UGT)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VDIVPDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } return false } -func rewriteValueAMD64_OpAMD64VDIVPDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETLEstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPDMasked256load {sym} [off] x ptr mask mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETLEstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETGEstore [off] {sym} ptr x mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64VDIVPDMasked256load) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETGEstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPDMasked512load {sym} [off] x ptr mask mem) + // match: (SETLEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETLEstore [off1+off2] {sym} base val mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VDIVPDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64SETLEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPS512load {sym} [off] x ptr mem) + // match: (SETLEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETLEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VDIVPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64SETLEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPSMasked128load {sym} [off] x ptr mask mem) + // match: (SETLEstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VDIVPSMasked128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPSMasked256load {sym} [off] x ptr mask mem) + // match: (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VDIVPSMasked256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VDIVPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VDIVPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VDIVPSMasked512load {sym} [off] x ptr mask mem) + // match: (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) + return true + } + // match: (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VDIVPSMasked512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VFMADD213PD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64SETLstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PD512load {sym} [off] x y ptr mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (SETLstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETGstore [off] {sym} ptr x mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - v.reset(OpAMD64VFMADD213PD512load) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETGstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETLstore [off1+off2] {sym} base val mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VFMADD213PDMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64SETLstore) + v.AuxInt = int32ToAuxInt(off1 + off2) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VFMADD213PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64SETLstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VFMADD213PDMasked512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PS512load {sym} [off] x y ptr mem) + // match: (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - v.reset(OpAMD64VFMADD213PS512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VFMADD213PSMasked128load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PSMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - v.reset(OpAMD64VFMADD213PSMasked256load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADD213PSMasked512load {sym} [off] x y ptr mask mem) + // match: (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VFMADD213PSMasked512load) + mem := v_2 + v.reset(OpAMD64MOVBstore) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool { v_0 := v.Args[0] - // match: (VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PD512load {sym} [off] x y ptr mem) + b := v.Block + // match: (SETNE (TESTBconst [1] x)) + // result: (ANDLconst [1] x) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64TESTBconst || auxIntToInt8(v_0.AuxInt) != 1 { break } - v.reset(OpAMD64VFMADDSUB213PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(1) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTWconst [1] x)) + // result: (ANDLconst [1] x) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64TESTWconst || auxIntToInt16(v_0.AuxInt) != 1 { break } - v.reset(OpAMD64VFMADDSUB213PDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(1) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTL (SHLL (MOVLconst [1]) x) y)) + // result: (SETB (BTL x y)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMADDSUB213PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQ (SHLQ (MOVQconst [1]) x) y)) + // result: (SETB (BTQ x y)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMADDSUB213PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PS512load {sym} [off] x y ptr mem) + // match: (SETNE (TESTLconst [c] x)) + // cond: isPowerOfTwo(uint32(c)) + // result: (SETB (BTLconst [int8(log32u(uint32(c)))] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTLconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint32(c))) { break } - v.reset(OpAMD64VFMADDSUB213PS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQconst [c] x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETB (BTQconst [int8(log32u(uint32(c)))] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTQconst { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint64(c))) { break } - v.reset(OpAMD64VFMADDSUB213PSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQ (MOVQconst [c]) x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETB (BTQconst [int8(log64u(uint64(c)))] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64TESTQ { break } - v.reset(OpAMD64VFMADDSUB213PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMADDSUB213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMADDSUB213PSMasked512load {sym} [off] x y ptr mask mem) + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPowerOfTwo(uint64(c))) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + v.AddArg(v0) + return true + } + break + } + // match: (SETNE (CMPLconst [1] s:(ANDLconst [1] _))) + // result: (SETEQ (CMPLconst [0] s)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64CMPLconst || auxIntToInt32(v_0.AuxInt) != 1 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + s := v_0.Args[0] + if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { break } - v.reset(OpAMD64VFMADDSUB213PSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PD512load {sym} [off] x y ptr mem) + // match: (SETNE (CMPQconst [1] s:(ANDQconst [1] _))) + // result: (SETEQ (CMPQconst [0] s)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64CMPQconst || auxIntToInt32(v_0.AuxInt) != 1 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + s := v_0.Args[0] + if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { break } - v.reset(OpAMD64VFMSUBADD213PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) + // cond: z1==z2 + // result: (SETB (BTQconst [63] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) + // cond: z1==z2 + // result: (SETB (BTQconst [31] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) + // cond: z1==z2 + // result: (SETB (BTQconst [0] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PS512load {sym} [off] x y ptr mem) + // match: (SETNE (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) + // cond: z1==z2 + // result: (SETB (BTLconst [0] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTQ z1:(SHRQconst [63] x) z2)) + // cond: z1==z2 + // result: (SETB (BTQconst [63] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem) + // match: (SETNE (TESTL z1:(SHRLconst [31] x) z2)) + // cond: z1==z2 + // result: (SETB (BTLconst [31] x)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg(v0) + return true } - v.reset(OpAMD64VFMSUBADD213PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VFMSUBADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VFMSUBADD213PSMasked512load {sym} [off] x y ptr mask mem) + // match: (SETNE (InvertFlags x)) + // result: (SETNE x) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64InvertFlags { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + v.reset(OpAMD64SETNE) + v.AddArg(x) + return true + } + // match: (SETNE (FlagEQ)) + // result: (MOVLconst [0]) + for { + if v_0.Op != OpAMD64FlagEQ { break } - v.reset(OpAMD64VFMSUBADD213PSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE (FlagLT_ULT)) + // result: (MOVLconst [1]) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64FlagLT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETNE (FlagLT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagLT_UGT { break } - v.reset(OpAMD64VGF2P8AFFINEINVQB128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE (FlagGT_ULT)) + // result: (MOVLconst [1]) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64FlagGT_ULT { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) + return true + } + // match: (SETNE (FlagGT_UGT)) + // result: (MOVLconst [1]) + for { + if v_0.Op != OpAMD64FlagGT_UGT { break } - v.reset(OpAMD64VGF2P8AFFINEINVQB256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(1) return true } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE (TESTQ s:(Select0 blsr:(BLSRQ _)) s)) + // result: (SETNE (Select1 blsr)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { + continue + } + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg(v0) + return true } - v.reset(OpAMD64VGF2P8AFFINEINVQB512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE (TESTL s:(Select0 blsr:(BLSRL _)) s)) + // result: (SETNE (Select1 blsr)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64TESTL { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRL || s != v_0_1 { + continue + } + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + v.AddArg(v0) + return true } - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQB128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQB256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQB512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQBMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQBMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: s for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - v.reset(OpAMD64VGF2P8AFFINEQBMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPD512load {sym} [off] x ptr mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETAE { continue } - v.reset(OpAMD64VMAXPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPDMasked128load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETBE { continue } - v.reset(OpAMD64VMAXPDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPDMasked256load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETEQF { continue } - v.reset(OpAMD64VMAXPDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPDMasked512load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETNEF { continue } - v.reset(OpAMD64VMAXPDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPS512load {sym} [off] x ptr mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETGF { continue } - v.reset(OpAMD64VMAXPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPSMasked128load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTQ x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTQ { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETGEF { continue } - v.reset(OpAMD64VMAXPSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPSMasked256load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETEQ { continue } - v.reset(OpAMD64VMAXPSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMAXPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMAXPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMAXPSMasked512load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETNE { continue } - v.reset(OpAMD64VMAXPSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPD512load {sym} [off] x ptr mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETL { continue } - v.reset(OpAMD64VMINPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPDMasked128load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETG { continue } - v.reset(OpAMD64VMINPDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPDMasked256load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETLE { continue } - v.reset(OpAMD64VMINPDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPDMasked512load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETGE { continue } - v.reset(OpAMD64VMINPDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPS512load {sym} [off] x ptr mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETA { continue } - v.reset(OpAMD64VMINPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPSMasked128load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETB { continue } - v.reset(OpAMD64VMINPSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPSMasked256load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETAE { continue } - v.reset(OpAMD64VMINPSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMINPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMINPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMINPSMasked512load {sym} [off] x ptr mask mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + t := v_0 + if t.Op != OpAMD64TESTL { + break + } + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + s := x.Args[0] + if s.Op != OpAMD64SETBE { continue } - v.reset(OpAMD64VMINPSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64VMOVD(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VMOVD x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVDload [off] {sym} ptr mem) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - x := v_0 - if x.Op != OpAMD64MOVLload { + t := v_0 + if t.Op != OpAMD64TESTL { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) - return true + break } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQU16Masked128 (VPABSW128 x) mask) - // result: (VPABSWMasked128 x mask) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPABSW128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSWMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPADDW128 x y) mask) - // result: (VPADDWMasked128 x y mask) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPADDW128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPADDSW128 x y) mask) - // result: (VPADDSWMasked128 x y mask) + // match: (SETNE t:(TESTL x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPADDSW128 { + t := v_0 + if t.Op != OpAMD64TESTL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDSWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPADDUSW128 x y) mask) - // result: (VPADDUSWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPADDUSW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDUSWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPAVGW128 x y) mask) - // result: (VPAVGWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETNE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPAVGW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPAVGWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) - // result: (VPBROADCASTWMasked128 x mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETL flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPBROADCASTW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTWMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPERMI2W128 x y z) mask) - // result: (VPERMI2WMasked128 x y z mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETG flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPERMI2W128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2WMasked128) - v.AddArg4(x, y, z, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMADDWD128 x y) mask) - // result: (VPMADDWDMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETLE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMADDWD128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDWDMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) - // result: (VPMADDUBSWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETGE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMADDUBSW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) - // result: (VPMOVSXWQMasked128 x mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETA flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVSXWQ128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWQMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) - // result: (VPMOVZXWQMasked128 x mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETB flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVZXWQ128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWQMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) - // result: (VPMOVSXWDMasked128 x mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETAE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVSXWD128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) - // result: (VPMOVZXWDMasked128 x mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETBE flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVZXWD128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMAXSW128 x y) mask) - // result: (VPMAXSWMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMAXSW128 { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSWMasked128) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU16Masked128 (VPMAXUW128 x y) mask) - // result: (VPMAXUWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETEQF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMAXUW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUWMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMINSW128 x y) mask) - // result: (VPMINSWMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMINSW128 { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSWMasked128) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU16Masked128 (VPMINUW128 x y) mask) - // result: (VPMINUWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETNEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMINUW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUWMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMULHW128 x y) mask) - // result: (VPMULHWMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMULHW128 { - break + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHWMasked128) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU16Masked128 (VPMULHUW128 x y) mask) - // result: (VPMULHUWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETGF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMULHUW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHUWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMULLW128 x y) mask) - // result: (VPMULLWMasked128 x y mask) + // match: (SETNE t:(TESTW x:(MOVBQZX s:(SETGEF flags)) x)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMULLW128 { + t := v_0 + if t.Op != OpAMD64TESTW { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + if x != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPOPCNTW128 x) mask) - // result: (VPOPCNTWMasked128 x mask) + // match: (SETNE t:(TESTB s:(SETEQ flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPOPCNTW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPERMW128 x y) mask) - // result: (VPERMWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETNE flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPERMW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) - // result: (VPMOVSWBMasked128_128 x mask) + // match: (SETNE t:(TESTB s:(SETL flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVSWB128_128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128_128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) - // result: (VPMOVUSWBMasked128_128 x mask) + // match: (SETNE t:(TESTB s:(SETG flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPMOVUSWB128_128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128_128) - v.AddArg2(x, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask) - // result: (VPSHLDWMasked128 [a] x y mask) + // match: (SETNE t:(TESTB s:(SETLE flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSHLDW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSLLW128 x y) mask) - // result: (VPSLLWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETGE flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSLLW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSHRDW128 [a] x y) mask) - // result: (VPSHRDWMasked128 [a] x y mask) + // match: (SETNE t:(TESTB s:(SETA flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSHRDW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSRAW128 x y) mask) - // result: (VPSRAWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETB flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSRAW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSRLW128 x y) mask) - // result: (VPSRLWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETAE flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSRLW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSHLDVW128 x y z) mask) - // result: (VPSHLDVWMasked128 x y z mask) + // match: (SETNE t:(TESTB s:(SETBE flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSHLDVW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVWMasked128) - v.AddArg4(x, y, z, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSLLVW128 x y) mask) - // result: (VPSLLVWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETEQF flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSLLVW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQF { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSHRDVW128 x y z) mask) - // result: (VPSHRDVWMasked128 x y z mask) + // match: (SETNE t:(TESTB s:(SETNEF flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSHRDVW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVWMasked128) - v.AddArg4(x, y, z, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNEF { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSRAVW128 x y) mask) - // result: (VPSRAVWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETGF flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSRAVW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGF { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSRLVW128 x y) mask) - // result: (VPSRLVWMasked128 x y mask) + // match: (SETNE t:(TESTB s:(SETGEF flags) s)) + // cond: t.Block == s.Block + // result: s for { - if v_0.Op != OpAMD64VPSRLVW128 { + t := v_0 + if t.Op != OpAMD64TESTB { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVWMasked128) - v.AddArg3(x, y, mask) - return true + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGEF { + continue + } + if s != t_1 || !(t.Block == s.Block) { + continue + } + v.copyOf(s) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSUBW128 x y) mask) - // result: (VPSUBWMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SETNEstore [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) + // result: (SETBstore [off] {sym} ptr (BTL x y) mem) for { - if v_0.Op != OpAMD64VPSUBW128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBWMasked128) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLL { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSUBSW128 x y) mask) - // result: (VPSUBSWMasked128 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) + // result: (SETBstore [off] {sym} ptr (BTQ x y) mem) for { - if v_0.Op != OpAMD64VPSUBSW128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBSWMasked128) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64SHLQ { + continue + } + x := v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + if v_1_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0_0.AuxInt) != 1 { + continue + } + y := v_1_1 + mem := v_2 + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSUBUSW128 x y) mask) - // result: (VPSUBUSWMasked128 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem) + // cond: isPowerOfTwo(uint32(c)) + // result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem) for { - if v_0.Op != OpAMD64VPSUBUSW128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBUSWMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) - // result: (VPMOVWBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVWB128_128 { + c := auxIntToInt32(v_1.AuxInt) + x := v_1.Args[0] + mem := v_2 + if !(isPowerOfTwo(uint32(c))) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked128 (VPSHUFHW128 [a] x) mask) - // result: (VPSHUFHWMasked128 [a] x mask) + // match: (SETNEstore [off] {sym} ptr (TESTQconst [c] x) mem) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETBstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem) for { - if v_0.Op != OpAMD64VPSHUFHW128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFHWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPSHUFLW128 [a] x) mask) - // result: (VPSHUFLWMasked128 [a] x mask) - for { - if v_0.Op != OpAMD64VPSHUFLW128 { + c := auxIntToInt32(v_1.AuxInt) + x := v_1.Args[0] + mem := v_2 + if !(isPowerOfTwo(uint64(c))) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFLWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked128 (VPSLLW128const [a] x) mask) - // result: (VPSLLWMasked128const [a] x mask) + // match: (SETNEstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) + // cond: isPowerOfTwo(uint64(c)) + // result: (SETBstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem) for { - if v_0.Op != OpAMD64VPSLLW128const { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + if v_1_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_1_0.AuxInt) + x := v_1_1 + mem := v_2 + if !(isPowerOfTwo(uint64(c))) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) - // result: (VPSRLWMasked128const [a] x mask) + // match: (SETNEstore [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) + // result: (SETEQstore [off] {sym} ptr (CMPLconst [0] s) mem) for { - if v_0.Op != OpAMD64VPSRLW128const { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64CMPLconst || auxIntToInt32(v_1.AuxInt) != 1 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) - // result: (VPSRAWMasked128const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRAW128const { + s := v_1.Args[0] + if s.Op != OpAMD64ANDLconst || auxIntToInt32(s.AuxInt) != 1 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + mem := v_2 + v.reset(OpAMD64SETEQstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg3(ptr, v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQU16Masked256 (VPABSW256 x) mask) - // result: (VPABSWMasked256 x mask) + // match: (SETNEstore [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) + // result: (SETEQstore [off] {sym} ptr (CMPQconst [0] s) mem) for { - if v_0.Op != OpAMD64VPABSW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64CMPQconst || auxIntToInt32(v_1.AuxInt) != 1 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSWMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPADDW256 x y) mask) - // result: (VPADDWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPADDW256 { + s := v_1.Args[0] + if s.Op != OpAMD64ANDQconst || auxIntToInt32(s.AuxInt) != 1 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDWMasked256) - v.AddArg3(x, y, mask) + mem := v_2 + v.reset(OpAMD64SETEQstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(s) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPADDSW256 x y) mask) - // result: (VPADDSWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem) for { - if v_0.Op != OpAMD64VPADDSW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDSWMasked256) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPADDUSW256 x y) mask) - // result: (VPADDUSWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem) for { - if v_0.Op != OpAMD64VPADDUSW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDUSWMasked256) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPAVGW256 x y) mask) - // result: (VPAVGWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTQconst [0] x) mem) for { - if v_0.Op != OpAMD64VPAVGW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPAVGWMasked256) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) - // result: (VPBROADCASTWMasked256 x mask) + // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTLconst [0] x) mem) for { - if v_0.Op != OpAMD64VPBROADCASTW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTWMasked256) - v.AddArg2(x, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPERMI2W256 x y z) mask) - // result: (VPERMI2WMasked256 x y z mask) + // match: (SETNEstore [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTQconst [63] x) mem) for { - if v_0.Op != OpAMD64VPERMI2W256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTQ { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2WMasked256) - v.AddArg4(x, y, z, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + x := z1.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPMADDWD256 x y) mask) - // result: (VPMADDWDMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) + // cond: z1==z2 + // result: (SETBstore [off] {sym} ptr (BTLconst [31] x) mem) for { - if v_0.Op != OpAMD64VPMADDWD256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64TESTL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDWDMasked256) - v.AddArg3(x, y, mask) - return true + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 { + z1 := v_1_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + x := z1.Args[0] + z2 := v_1_1 + mem := v_2 + if !(z1 == z2) { + continue + } + v.reset(OpAMD64SETBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } + break } - // match: (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) - // result: (VPMADDUBSWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (InvertFlags x) mem) + // result: (SETNEstore [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64VPMADDUBSW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64InvertFlags { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked256) - v.AddArg3(x, y, mask) + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64SETNEstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (VMOVDQU16Masked256 (VPMOVSXWQ256 x) mask) - // result: (VPMOVSXWQMasked256 x mask) + // match: (SETNEstore [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SETNEstore [off1+off2] {sym} base val mem) for { - if v_0.Op != OpAMD64VPMOVSXWQ256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWQMasked256) - v.AddArg2(x, mask) + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64SETNEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU16Masked256 (VPMOVZXWQ256 x) mask) - // result: (VPMOVZXWQMasked256 x mask) + // match: (SETNEstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SETNEstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - if v_0.Op != OpAMD64VPMOVZXWQ256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWQMasked256) - v.AddArg2(x, mask) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SETNEstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) - // result: (VPMOVSXWDMasked256 x mask) + // match: (SETNEstore [off] {sym} ptr (FlagEQ) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [0]) mem) for { - if v_0.Op != OpAMD64VPMOVSXWD256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagEQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked256) - v.AddArg2(x, mask) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(0) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) - // result: (VPMOVZXWDMasked256 x mask) + // match: (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if v_0.Op != OpAMD64VPMOVZXWD256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_ULT { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked256) - v.AddArg2(x, mask) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPMAXSW256 x y) mask) - // result: (VPMAXSWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if v_0.Op != OpAMD64VPMAXSW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagLT_UGT { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSWMasked256) - v.AddArg3(x, y, mask) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPMAXUW256 x y) mask) - // result: (VPMAXUWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if v_0.Op != OpAMD64VPMAXUW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_ULT { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUWMasked256) - v.AddArg3(x, y, mask) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPMINSW256 x y) mask) - // result: (VPMINSWMasked256 x y mask) + // match: (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) + // result: (MOVBstore [off] {sym} ptr (MOVLconst [1]) mem) for { - if v_0.Op != OpAMD64VPMINSW256 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMINUW256 x y) mask) - // result: (VPMINUWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPMINUW256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64FlagGT_UGT { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUWMasked256) - v.AddArg3(x, y, mask) + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLconst, typ.UInt8) + v0.AuxInt = int32ToAuxInt(1) + v.AddArg3(ptr, v0, mem) return true } - // match: (VMOVDQU16Masked256 (VPMULHW256 x y) mask) - // result: (VPMULHWMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHLL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHLL x (MOVQconst [c])) + // result: (SHLLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPMULHW256 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHWMasked256) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked256 (VPMULHUW256 x y) mask) - // result: (VPMULHUWMasked256 x y mask) + // match: (SHLL x (MOVLconst [c])) + // result: (SHLLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPMULHUW256 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHUWMasked256) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked256 (VPMULLW256 x y) mask) - // result: (VPMULLWMasked256 x y mask) + // match: (SHLL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SHLL x y) for { - if v_0.Op != OpAMD64VPMULLW256 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPOPCNTW256 x) mask) - // result: (VPOPCNTWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPOPCNTW256 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked256 (VPERMW256 x y) mask) - // result: (VPERMWMasked256 x y mask) + // match: (SHLL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SHLL x (NEGQ y)) for { - if v_0.Op != OpAMD64VPERMW256 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) - // result: (VPMOVSWBMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSWB128_256 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) - // result: (VPMOVSWBMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSWB256 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SHLL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) - // result: (VPMOVUSWBMasked128_256 x mask) + // match: (SHLL x (ANDQconst [c] y)) + // cond: c & 31 == 31 + // result: (SHLL x y) for { - if v_0.Op != OpAMD64VPMOVUSWB128_256 { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) - // result: (VPMOVUSWBMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSWB256 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked256 (VPSHLDW256 [a] x y) mask) - // result: (VPSHLDWMasked256 [a] x y mask) + // match: (SHLL x (NEGQ (ANDQconst [c] y))) + // cond: c & 31 == 31 + // result: (SHLL x (NEGQ y)) for { - if v_0.Op != OpAMD64VPSHLDW256 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSLLW256 x y) mask) - // result: (VPSLLWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSLLW256 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSHRDW256 [a] x y) mask) - // result: (VPSHRDWMasked256 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHRDW256 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked256 (VPSRAW256 x y) mask) - // result: (VPSRAWMasked256 x y mask) + // match: (SHLL x (ADDLconst [c] y)) + // cond: c & 31 == 0 + // result: (SHLL x y) for { - if v_0.Op != OpAMD64VPSRAW256 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSRLW256 x y) mask) - // result: (VPSRLWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSRLW256 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked256 (VPSHLDVW256 x y z) mask) - // result: (VPSHLDVWMasked256 x y z mask) + // match: (SHLL x (NEGL (ADDLconst [c] y))) + // cond: c & 31 == 0 + // result: (SHLL x (NEGL y)) for { - if v_0.Op != OpAMD64VPSHLDVW256 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVWMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSLLVW256 x y) mask) - // result: (VPSLLVWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSLLVW256 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSHRDVW256 x y z) mask) - // result: (VPSHRDVWMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPSHRDVW256 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVWMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64SHLL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked256 (VPSRAVW256 x y) mask) - // result: (VPSRAVWMasked256 x y mask) + // match: (SHLL x (ANDLconst [c] y)) + // cond: c & 31 == 31 + // result: (SHLL x y) for { - if v_0.Op != OpAMD64VPSRAVW256 { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSRLVW256 x y) mask) - // result: (VPSRLVWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVW256 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked256 (VPSUBW256 x y) mask) - // result: (VPSUBWMasked256 x y mask) + // match: (SHLL x (NEGL (ANDLconst [c] y))) + // cond: c & 31 == 31 + // result: (SHLL x (NEGL y)) for { - if v_0.Op != OpAMD64VPSUBW256 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSUBSW256 x y) mask) - // result: (VPSUBSWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSUBSW256 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBSWMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSUBUSW256 x y) mask) - // result: (VPSUBUSWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSUBUSW256 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBUSWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) - // result: (VPMOVWBMasked128_256 x mask) + // match: (SHLL l:(MOVLload [off] {sym} ptr mem) x) + // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) + // result: (SHLXLload [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64VPMOVWB128_256 { + l := v_0 + if l.Op != OpAMD64MOVLload { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) - // result: (VPMOVWBMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVWB256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVWBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SHLXLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (VMOVDQU16Masked256 (VPSHUFHW256 [a] x) mask) - // result: (VPSHUFHWMasked256 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHLLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SHLLconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VPSHUFHW256 { + if auxIntToInt8(v.AuxInt) != 0 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFHWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU16Masked256 (VPSHUFLW256 [a] x) mask) - // result: (VPSHUFLWMasked256 [a] x mask) + // match: (SHLLconst [1] x) + // result: (ADDL x x) for { - if v_0.Op != OpAMD64VPSHUFLW256 { + if auxIntToInt8(v.AuxInt) != 1 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFLWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + x := v_0 + v.reset(OpAMD64ADDL) + v.AddArg2(x, x) return true } - // match: (VMOVDQU16Masked256 (VPSLLW256const [a] x) mask) - // result: (VPSLLWMasked256const [a] x mask) + // match: (SHLLconst [c] (ADDL x x)) + // cond: c < 31 + // result: (SHLLconst [c+1] x) for { - if v_0.Op != OpAMD64VPSLLW256const { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDL { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) - // result: (VPSRLWMasked256const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRLW256const { + x := v_0.Args[1] + if x != v_0.Args[0] || !(c < 31) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(c + 1) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) - // result: (VPSRAWMasked256const [a] x mask) + // match: (SHLLconst [d] (MOVLconst [c])) + // result: (MOVLconst [c << uint64(d)]) for { - if v_0.Op != OpAMD64VPSRAW256const { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + c := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(c << uint64(d)) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64SHLQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU16Masked512 (VPABSW512 x) mask) - // result: (VPABSWMasked512 x mask) + b := v.Block + // match: (SHLQ x (MOVQconst [c])) + // result: (SHLQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64VPABSW512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSWMasked512) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPADDW512 x y) mask) - // result: (VPADDWMasked512 x y mask) + // match: (SHLQ x (MOVLconst [c])) + // result: (SHLQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64VPADDW512 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPADDSW512 x y) mask) - // result: (VPADDSWMasked512 x y mask) + // match: (SHLQ x (ADDQconst [c] y)) + // cond: c & 63 == 0 + // result: (SHLQ x y) for { - if v_0.Op != OpAMD64VPADDSW512 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDSWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPADDUSW512 x y) mask) - // result: (VPADDUSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPADDUSW512 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDUSWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPAVGW512 x y) mask) - // result: (VPAVGWMasked512 x y mask) + // match: (SHLQ x (NEGQ (ADDQconst [c] y))) + // cond: c & 63 == 0 + // result: (SHLQ x (NEGQ y)) for { - if v_0.Op != OpAMD64VPAVGW512 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPAVGWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) - // result: (VPBROADCASTWMasked512 x mask) - for { - if v_0.Op != OpAMD64VPBROADCASTW512 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTWMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) - // result: (VPERMI2WMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2W512 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2WMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64SHLQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512 x y mask) + // match: (SHLQ x (ANDQconst [c] y)) + // cond: c & 63 == 63 + // result: (SHLQ x y) for { - if v_0.Op != OpAMD64VPMADDWD512 { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDWDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) - // result: (VPMADDUBSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMADDUBSW512 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) - // result: (VPMOVSXWDMasked512 x mask) + // match: (SHLQ x (NEGQ (ANDQconst [c] y))) + // cond: c & 63 == 63 + // result: (SHLQ x (NEGQ y)) for { - if v_0.Op != OpAMD64VPMOVSXWD512 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked512) - v.AddArg2(x, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) - // result: (VPMOVSXWQMasked512 x mask) + // match: (SHLQ x (ADDLconst [c] y)) + // cond: c & 63 == 0 + // result: (SHLQ x y) for { - if v_0.Op != OpAMD64VPMOVSXWQ512 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWQMasked512) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) - // result: (VPMOVZXWDMasked512 x mask) + // match: (SHLQ x (NEGL (ADDLconst [c] y))) + // cond: c & 63 == 0 + // result: (SHLQ x (NEGL y)) for { - if v_0.Op != OpAMD64VPMOVZXWD512 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked512) - v.AddArg2(x, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHLQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) - // result: (VPMOVZXWQMasked512 x mask) + // match: (SHLQ x (ANDLconst [c] y)) + // cond: c & 63 == 63 + // result: (SHLQ x y) for { - if v_0.Op != OpAMD64VPMOVZXWQ512 { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWQMasked512) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPMAXSW512 x y) mask) - // result: (VPMAXSWMasked512 x y mask) + // match: (SHLQ x (NEGL (ANDLconst [c] y))) + // cond: c & 63 == 63 + // result: (SHLQ x (NEGL y)) for { - if v_0.Op != OpAMD64VPMAXSW512 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSWMasked512) - v.AddArg3(x, y, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHLQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPMAXUW512 x y) mask) - // result: (VPMAXUWMasked512 x y mask) + // match: (SHLQ l:(MOVQload [off] {sym} ptr mem) x) + // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) + // result: (SHLXQload [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64VPMAXUW512 { + l := v_0 + if l.Op != OpAMD64MOVQload { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUWMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHLXQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (VMOVDQU16Masked512 (VPMINSW512 x y) mask) - // result: (VPMINSWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHLQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SHLQconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VPMINSW512 { + if auxIntToInt8(v.AuxInt) != 0 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSWMasked512) - v.AddArg3(x, y, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU16Masked512 (VPMINUW512 x y) mask) - // result: (VPMINUWMasked512 x y mask) + // match: (SHLQconst [1] x) + // result: (ADDQ x x) for { - if v_0.Op != OpAMD64VPMINUW512 { + if auxIntToInt8(v.AuxInt) != 1 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUWMasked512) - v.AddArg3(x, y, mask) + x := v_0 + v.reset(OpAMD64ADDQ) + v.AddArg2(x, x) return true } - // match: (VMOVDQU16Masked512 (VPMULHW512 x y) mask) - // result: (VPMULHWMasked512 x y mask) + // match: (SHLQconst [c] (ADDQ x x)) + // cond: c < 63 + // result: (SHLQconst [c+1] x) for { - if v_0.Op != OpAMD64VPMULHW512 { + c := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64ADDQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHWMasked512) - v.AddArg3(x, y, mask) + x := v_0.Args[1] + if x != v_0.Args[0] || !(c < 63) { + break + } + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(c + 1) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPMULHUW512 x y) mask) - // result: (VPMULHUWMasked512 x y mask) + // match: (SHLQconst [d] (MOVQconst [c])) + // result: (MOVQconst [c << uint64(d)]) for { - if v_0.Op != OpAMD64VPMULHUW512 { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHUWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(c << uint64(d)) return true } - // match: (VMOVDQU16Masked512 (VPMULLW512 x y) mask) - // result: (VPMULLWMasked512 x y mask) + // match: (SHLQconst [d] (MOVLconst [c])) + // result: (MOVQconst [int64(c) << uint64(d)]) for { - if v_0.Op != OpAMD64VPMULLW512 { + d := auxIntToInt8(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(c) << uint64(d)) return true } - // match: (VMOVDQU16Masked512 (VPOPCNTW512 x) mask) - // result: (VPOPCNTWMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHLXLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHLXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHLLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VPOPCNTW512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked512) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU16Masked512 (VPERMW512 x y) mask) - // result: (VPERMWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHLXQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHLXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VPERMW512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU16Masked512 (VPSHLDW512 [a] x y) mask) - // result: (VPSHLDWMasked512 [a] x y mask) + // match: (SHLXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHLQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VPSHLDW512 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHLQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU16Masked512 (VPSLLW512 x y) mask) - // result: (VPSLLWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SHRB x (MOVQconst [c])) + // cond: c&31 < 8 + // result: (SHRBconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPSLLW512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + if !(c&31 < 8) { + break + } + v.reset(OpAMD64SHRBconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPSHRDW512 [a] x y) mask) - // result: (VPSHRDWMasked512 [a] x y mask) + // match: (SHRB x (MOVLconst [c])) + // cond: c&31 < 8 + // result: (SHRBconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPSHRDW512 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + if !(c&31 < 8) { + break + } + v.reset(OpAMD64SHRBconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPSRAW512 x y) mask) - // result: (VPSRAWMasked512 x y mask) + // match: (SHRB _ (MOVQconst [c])) + // cond: c&31 >= 8 + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPSRAW512 { + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + if !(c&31 >= 8) { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU16Masked512 (VPSRLW512 x y) mask) - // result: (VPSRLWMasked512 x y mask) + // match: (SHRB _ (MOVLconst [c])) + // cond: c&31 >= 8 + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPSRLW512 { + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + if !(c&31 >= 8) { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU16Masked512 (VPSHLDVW512 x y z) mask) - // result: (VPSHLDVWMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRBconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SHRBconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VPSHLDVW512 { + if auxIntToInt8(v.AuxInt) != 0 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVWMasked512) - v.AddArg4(x, y, z, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU16Masked512 (VPSLLVW512 x y) mask) - // result: (VPSLLVWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHRL x (MOVQconst [c])) + // result: (SHRLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPSLLVW512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPSHRDVW512 x y z) mask) - // result: (VPSHRDVWMasked512 x y z mask) + // match: (SHRL x (MOVLconst [c])) + // result: (SHRLconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPSHRDVW512 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVWMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU16Masked512 (VPSRAVW512 x y) mask) - // result: (VPSRAVWMasked512 x y mask) + // match: (SHRL x (ADDQconst [c] y)) + // cond: c & 31 == 0 + // result: (SHRL x y) for { - if v_0.Op != OpAMD64VPSRAVW512 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSRLVW512 x y) mask) - // result: (VPSRLVWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVW512 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPSUBW512 x y) mask) - // result: (VPSUBWMasked512 x y mask) + // match: (SHRL x (NEGQ (ADDQconst [c] y))) + // cond: c & 31 == 0 + // result: (SHRL x (NEGQ y)) for { - if v_0.Op != OpAMD64VPSUBW512 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBWMasked512) - v.AddArg3(x, y, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPSUBSW512 x y) mask) - // result: (VPSUBSWMasked512 x y mask) + // match: (SHRL x (ANDQconst [c] y)) + // cond: c & 31 == 31 + // result: (SHRL x y) for { - if v_0.Op != OpAMD64VPSUBSW512 { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBSWMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) - // result: (VPSUBUSWMasked512 x y mask) + // match: (SHRL x (NEGQ (ANDQconst [c] y))) + // cond: c & 31 == 31 + // result: (SHRL x (NEGQ y)) for { - if v_0.Op != OpAMD64VPSUBUSW512 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBUSWMasked512) - v.AddArg3(x, y, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) - // result: (VPSHUFHWMasked512 [a] x mask) + // match: (SHRL x (ADDLconst [c] y)) + // cond: c & 31 == 0 + // result: (SHRL x y) for { - if v_0.Op != OpAMD64VPSHUFHW512 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFHWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPSHUFLW512 [a] x) mask) - // result: (VPSHUFLWMasked512 [a] x mask) + // match: (SHRL x (NEGL (ADDLconst [c] y))) + // cond: c & 31 == 0 + // result: (SHRL x (NEGL y)) for { - if v_0.Op != OpAMD64VPSHUFLW512 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFLWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 0) { + break + } + v.reset(OpAMD64SHRL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) - // result: (VPSLLWMasked512const [a] x mask) + // match: (SHRL x (ANDLconst [c] y)) + // cond: c & 31 == 31 + // result: (SHRL x y) for { - if v_0.Op != OpAMD64VPSLLW512const { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } - // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) - // result: (VPSRLWMasked512const [a] x mask) + // match: (SHRL x (NEGL (ANDLconst [c] y))) + // cond: c & 31 == 31 + // result: (SHRL x (NEGL y)) for { - if v_0.Op != OpAMD64VPSRLW512const { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { + break + } + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&31 == 31) { + break + } + v.reset(OpAMD64SHRL) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) - // result: (VPSRAWMasked512const [a] x mask) + // match: (SHRL l:(MOVLload [off] {sym} ptr mem) x) + // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) + // result: (SHRXLload [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64VPSRAW512const { + l := v_0 + if l.Op != OpAMD64MOVLload { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64SHRXLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64SHRLconst(v *Value) bool { v_0 := v.Args[0] - // match: (VMOVDQU32Masked128 (VPABSD128 x) mask) - // result: (VPABSDMasked128 x mask) + // match: (SHRLconst [1] (ADDL x x)) + // result: (ANDLconst [0x7fffffff] x) for { - if v_0.Op != OpAMD64VPABSD128 { + if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDL { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VADDPS128 x y) mask) - // result: (VADDPSMasked128 x y mask) - for { - if v_0.Op != OpAMD64VADDPS128 { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPSMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64ANDLconst) + v.AuxInt = int32ToAuxInt(0x7fffffff) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPADDD128 x y) mask) - // result: (VPADDDMasked128 x y mask) + // match: (SHRLconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VPADDD128 { + if auxIntToInt8(v.AuxInt) != 0 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDDMasked128) - v.AddArg3(x, y, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) - // result: (VBROADCASTSSMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SHRQ x (MOVQconst [c])) + // result: (SHRQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64VBROADCASTSS128 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSSMasked128) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) - // result: (VPBROADCASTDMasked128 x mask) + // match: (SHRQ x (MOVLconst [c])) + // result: (SHRQconst [int8(c&63)] x) for { - if v_0.Op != OpAMD64VPBROADCASTD128 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTDMasked128) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) - // result: (VRNDSCALEPSMasked128 [a] x mask) + // match: (SHRQ x (ADDQconst [c] y)) + // cond: c & 63 == 0 + // result: (SHRQ x y) for { - if v_0.Op != OpAMD64VRNDSCALEPS128 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VREDUCEPS128 [a] x) mask) - // result: (VREDUCEPSMasked128 [a] x mask) - for { - if v_0.Op != OpAMD64VREDUCEPS128 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU32Masked128 (VPERMI2PS128 x y z) mask) - // result: (VPERMI2PSMasked128 x y z mask) + // match: (SHRQ x (NEGQ (ADDQconst [c] y))) + // cond: c & 63 == 0 + // result: (SHRQ x (NEGQ y)) for { - if v_0.Op != OpAMD64VPERMI2PS128 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PSMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPERMI2D128 x y z) mask) - // result: (VPERMI2DMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2D128 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2DMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked128 (VCVTDQ2PS128 x) mask) - // result: (VCVTDQ2PSMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTDQ2PS128 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTDQ2PSMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64SHRQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked128 (VCVTUDQ2PS128 x) mask) - // result: (VCVTUDQ2PSMasked128 x mask) + // match: (SHRQ x (ANDQconst [c] y)) + // cond: c & 63 == 63 + // result: (SHRQ x y) for { - if v_0.Op != OpAMD64VCVTUDQ2PS128 { + x := v_0 + if v_1.Op != OpAMD64ANDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUDQ2PSMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VCVTTPS2DQ128 x) mask) - // result: (VCVTTPS2DQMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2DQ128 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2DQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU32Masked128 (VCVTTPS2UDQ128 x) mask) - // result: (VCVTTPS2UDQMasked128 x mask) + // match: (SHRQ x (NEGQ (ANDQconst [c] y))) + // cond: c & 63 == 63 + // result: (SHRQ x (NEGQ y)) for { - if v_0.Op != OpAMD64VCVTTPS2UDQ128 { + x := v_0 + if v_1.Op != OpAMD64NEGQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2UDQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VDIVPS128 x y) mask) - // result: (VDIVPSMasked128 x y mask) - for { - if v_0.Op != OpAMD64VDIVPS128 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPSMasked128) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { + break + } + v.reset(OpAMD64SHRQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) - // result: (VPMOVSXDQMasked128 x mask) + // match: (SHRQ x (ADDLconst [c] y)) + // cond: c & 63 == 0 + // result: (SHRQ x y) for { - if v_0.Op != OpAMD64VPMOVSXDQ128 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked128) - v.AddArg2(x, mask) + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 0) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) - // result: (VPMOVZXDQMasked128 x mask) + // match: (SHRQ x (NEGL (ADDLconst [c] y))) + // cond: c & 63 == 0 + // result: (SHRQ x (NEGL y)) for { - if v_0.Op != OpAMD64VPMOVZXDQ128 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPLZCNTD128 x) mask) - // result: (VPLZCNTDMasked128 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTD128 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ADDLconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VMAXPS128 x y) mask) - // result: (VMAXPSMasked128 x y mask) - for { - if v_0.Op != OpAMD64VMAXPS128 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPSMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHRQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked128 (VPMAXSD128 x y) mask) - // result: (VPMAXSDMasked128 x y mask) + // match: (SHRQ x (ANDLconst [c] y)) + // cond: c & 63 == 63 + // result: (SHRQ x y) for { - if v_0.Op != OpAMD64VPMAXSD128 { + x := v_0 + if v_1.Op != OpAMD64ANDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMAXUD128 x y) mask) - // result: (VPMAXUDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMAXUD128 { + c := auxIntToInt32(v_1.AuxInt) + y := v_1.Args[0] + if !(c&63 == 63) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } - // match: (VMOVDQU32Masked128 (VMINPS128 x y) mask) - // result: (VMINPSMasked128 x y mask) + // match: (SHRQ x (NEGL (ANDLconst [c] y))) + // cond: c & 63 == 63 + // result: (SHRQ x (NEGL y)) for { - if v_0.Op != OpAMD64VMINPS128 { + x := v_0 + if v_1.Op != OpAMD64NEGL { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPSMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMINSD128 x y) mask) - // result: (VPMINSDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMINSD128 { + t := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64ANDLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMINUD128 x y) mask) - // result: (VPMINUDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMINUD128 { + c := auxIntToInt32(v_1_0.AuxInt) + y := v_1_0.Args[0] + if !(c&63 == 63) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHRQ) + v0 := b.NewValue0(v.Pos, OpAMD64NEGL, t) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked128 (VFMADD213PS128 x y z) mask) - // result: (VFMADD213PSMasked128 x y z mask) + // match: (SHRQ l:(MOVQload [off] {sym} ptr mem) x) + // cond: buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) + // result: (SHRXQload [off] {sym} ptr x mem) for { - if v_0.Op != OpAMD64VFMADD213PS128 { + l := v_0 + if l.Op != OpAMD64MOVQload { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PSMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked128 (VFMADDSUB213PS128 x y z) mask) - // result: (VFMADDSUB213PSMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VFMADDSUB213PS128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + x := v_1 + if !(buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PSMasked128) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64SHRXQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, x, mem) return true } - // match: (VMOVDQU32Masked128 (VMULPS128 x y) mask) - // result: (VMULPSMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SHRQconst [1] (ADDQ x x)) + // result: (BTRQconst [63] x) for { - if v_0.Op != OpAMD64VMULPS128 { + if auxIntToInt8(v.AuxInt) != 1 || v_0.Op != OpAMD64ADDQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPSMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMULLD128 x y) mask) - // result: (VPMULLDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMULLD128 { + x := v_0.Args[1] + if x != v_0.Args[0] { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64BTRQconst) + v.AuxInt = int8ToAuxInt(63) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VFMSUBADD213PS128 x y z) mask) - // result: (VFMSUBADD213PSMasked128 x y z mask) + // match: (SHRQconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VFMSUBADD213PS128 { + if auxIntToInt8(v.AuxInt) != 0 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PSMasked128) - v.AddArg4(x, y, z, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU32Masked128 (VPOPCNTD128 x) mask) - // result: (VPOPCNTDMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SHRW x (MOVQconst [c])) + // cond: c&31 < 16 + // result: (SHRWconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPOPCNTD128 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPROLD128 [a] x) mask) - // result: (VPROLDMasked128 [a] x mask) - for { - if v_0.Op != OpAMD64VPROLD128 { + c := auxIntToInt64(v_1.AuxInt) + if !(c&31 < 16) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64SHRWconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPRORD128 [a] x) mask) - // result: (VPRORDMasked128 [a] x mask) + // match: (SHRW x (MOVLconst [c])) + // cond: c&31 < 16 + // result: (SHRWconst [int8(c&31)] x) for { - if v_0.Op != OpAMD64VPRORD128 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPROLVD128 x y) mask) - // result: (VPROLVDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPROLVD128 { + c := auxIntToInt32(v_1.AuxInt) + if !(c&31 < 16) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SHRWconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPRORVD128 x y) mask) - // result: (VPRORVDMasked128 x y mask) + // match: (SHRW _ (MOVQconst [c])) + // cond: c&31 >= 16 + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPRORVD128 { + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) - // result: (VPMOVSDBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDB128_128 { + c := auxIntToInt64(v_1.AuxInt) + if !(c&31 >= 16) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) - // result: (VPACKSSDWMasked128 x y mask) + // match: (SHRW _ (MOVLconst [c])) + // cond: c&31 >= 16 + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPACKSSDW128 { + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) - // result: (VPMOVSDWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDW128_128 { + c := auxIntToInt32(v_1.AuxInt) + if !(c&31 >= 16) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) - // result: (VPMOVUSDBMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SHRWconst x [0]) + // result: x for { - if v_0.Op != OpAMD64VPMOVUSDB128_128 { + if auxIntToInt8(v.AuxInt) != 0 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_128) - v.AddArg2(x, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) - // result: (VPACKUSDWMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRXLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHRXLload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHRLconst [int8(c&31)] (MOVLload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VPACKUSDW128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked128) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRLconst) + v.AuxInt = int8ToAuxInt(int8(c & 31)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) - // result: (VPMOVUSDWMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SHRXQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SHRXQload [off] {sym} ptr (MOVQconst [c]) mem) + // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VPMOVUSDW128_128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128_128) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU32Masked128 (VSCALEFPS128 x y) mask) - // result: (VSCALEFPSMasked128 x y mask) + // match: (SHRXQload [off] {sym} ptr (MOVLconst [c]) mem) + // result: (SHRQconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) for { - if v_0.Op != OpAMD64VSCALEFPS128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPSMasked128) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + mem := v_2 + v.reset(OpAMD64SHRQconst) + v.AuxInt = int8ToAuxInt(int8(c & 63)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg(v0) return true } - // match: (VMOVDQU32Masked128 (VPSHLDD128 [a] x y) mask) - // result: (VPSHLDDMasked128 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SUBL x (MOVLconst [c])) + // result: (SUBLconst x [c]) for { - if v_0.Op != OpAMD64VPSHLDD128 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64SUBLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPSLLD128 x y) mask) - // result: (VPSLLDMasked128 x y mask) + // match: (SUBL (MOVLconst [c]) x) + // result: (NEGL (SUBLconst x [c])) for { - if v_0.Op != OpAMD64VPSLLD128 { + if v_0.Op != OpAMD64MOVLconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked128) - v.AddArg3(x, y, mask) + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64NEGL) + v0 := b.NewValue0(v.Pos, OpAMD64SUBLconst, v.Type) + v0.AuxInt = int32ToAuxInt(c) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (VMOVDQU32Masked128 (VPSHRDD128 [a] x y) mask) - // result: (VPSHRDDMasked128 [a] x y mask) + // match: (SUBL x x) + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPSHRDD128 { + x := v_0 + if x != v_1 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU32Masked128 (VPSRAD128 x y) mask) - // result: (VPSRADMasked128 x y mask) + // match: (SUBL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (SUBLload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64VPSRAD128 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVLload { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked128) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + break + } + v.reset(OpAMD64SUBLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked128 (VPSRLD128 x y) mask) - // result: (VPSRLDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SUBLconst [0] x) + // result: x for { - if v_0.Op != OpAMD64VPSRLD128 { + if auxIntToInt32(v.AuxInt) != 0 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked128) - v.AddArg3(x, y, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU32Masked128 (VPSHLDVD128 x y z) mask) - // result: (VPSHLDVDMasked128 x y z mask) + // match: (SUBLconst [c] x) + // result: (ADDLconst [-c] x) for { - if v_0.Op != OpAMD64VPSHLDVD128 { - break - } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVDMasked128) - v.AddArg4(x, y, z, mask) + c := auxIntToInt32(v.AuxInt) + x := v_0 + v.reset(OpAMD64ADDLconst) + v.AuxInt = int32ToAuxInt(-c) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPSLLVD128 x y) mask) - // result: (VPSLLVDMasked128 x y mask) +} +func rewriteValueAMD64_OpAMD64SUBLload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SUBLload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBLload [off1+off2] {sym} val base mem) for { - if v_0.Op != OpAMD64VPSLLVD128 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPSHRDVD128 x y z) mask) - // result: (VPSHRDVDMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPSHRDVD128 { + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVDMasked128) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64SUBLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked128 (VPSRAVD128 x y) mask) - // result: (VPSRAVDMasked128 x y mask) + // match: (SUBLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - if v_0.Op != OpAMD64VPSRAVD128 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPSRLVD128 x y) mask) - // result: (VPSRLVDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVD128 { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SUBLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked128 (VSQRTPS128 x) mask) - // result: (VSQRTPSMasked128 x mask) + // match: (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) + // result: (SUBL x (MOVLf2i y)) for { - if v_0.Op != OpAMD64VSQRTPS128 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked128 (VSUBPS128 x y) mask) - // result: (VSUBPSMasked128 x y mask) - for { - if v_0.Op != OpAMD64VSUBPS128 { + y := v_2.Args[1] + if ptr != v_2.Args[0] { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPSMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SUBL) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked128 (VPSUBD128 x y) mask) - // result: (VPSUBDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBLmodify(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SUBLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBLmodify [off1+off2] {sym} base val mem) for { - if v_0.Op != OpAMD64VPSUBD128 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) - // result: (VPMOVDBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVDB128_128 { + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBLmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) - // result: (VPMOVDWMasked128_128 x mask) + // match: (SUBLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - if v_0.Op != OpAMD64VPMOVDW128_128 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128_128) - v.AddArg2(x, mask) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SUBLmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU32Masked128 (VPSHUFD128 [a] x) mask) - // result: (VPSHUFDMasked128 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SUBQ x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (SUBQconst x [int32(c)]) for { - if v_0.Op != OpAMD64VPSHUFD128 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { + break + } + v.reset(OpAMD64SUBQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked128 (VPSLLD128const [a] x) mask) - // result: (VPSLLDMasked128const [a] x mask) + // match: (SUBQ (MOVQconst [c]) x) + // cond: is32Bit(c) + // result: (NEGQ (SUBQconst x [int32(c)])) for { - if v_0.Op != OpAMD64VPSLLD128const { + if v_0.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(is32Bit(c)) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64SUBQconst, v.Type) + v0.AuxInt = int32ToAuxInt(int32(c)) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) - // result: (VPSRLDMasked128const [a] x mask) + // match: (SUBQ x x) + // result: (MOVLconst [0]) for { - if v_0.Op != OpAMD64VPSRLD128const { + x := v_0 + if x != v_1 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask) - // result: (VPSRADMasked128const [a] x mask) + // match: (SUBQ x l:(MOVQload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (SUBQload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64VPSRAD128const { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVQload { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + break + } + v.reset(OpAMD64SUBQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64SUBQborrow(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU32Masked256 (VPABSD256 x) mask) - // result: (VPABSDMasked256 x mask) + // match: (SUBQborrow x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (SUBQconstborrow x [int32(c)]) for { - if v_0.Op != OpAMD64VPABSD256 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VADDPS256 x y) mask) - // result: (VADDPSMasked256 x y mask) - for { - if v_0.Op != OpAMD64VADDPS256 { + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPSMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64SUBQconstborrow) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked256 (VPADDD256 x y) mask) - // result: (VPADDDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SUBQconst [0] x) + // result: x for { - if v_0.Op != OpAMD64VPADDD256 { + if auxIntToInt32(v.AuxInt) != 0 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDDMasked256) - v.AddArg3(x, y, mask) + x := v_0 + v.copyOf(x) return true } - // match: (VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) - // result: (VBROADCASTSSMasked256 x mask) + // match: (SUBQconst [c] x) + // cond: c != -(1<<31) + // result: (ADDQconst [-c] x) for { - if v_0.Op != OpAMD64VBROADCASTSS256 { + c := auxIntToInt32(v.AuxInt) + x := v_0 + if !(c != -(1 << 31)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSSMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(-c) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) - // result: (VPBROADCASTDMasked256 x mask) + // match: (SUBQconst (MOVQconst [d]) [c]) + // result: (MOVQconst [d-int64(c)]) for { - if v_0.Op != OpAMD64VPBROADCASTD256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTDMasked256) - v.AddArg2(x, mask) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(d - int64(c)) return true } - // match: (VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) - // result: (VRNDSCALEPSMasked256 [a] x mask) + // match: (SUBQconst (SUBQconst x [d]) [c]) + // cond: is32Bit(int64(-c)-int64(d)) + // result: (ADDQconst [-c-d] x) for { - if v_0.Op != OpAMD64VRNDSCALEPS256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64SUBQconst { break } - a := auxIntToUint8(v_0.AuxInt) + d := auxIntToInt32(v_0.AuxInt) x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VREDUCEPS256 [a] x) mask) - // result: (VREDUCEPSMasked256 [a] x mask) - for { - if v_0.Op != OpAMD64VREDUCEPS256 { + if !(is32Bit(int64(-c) - int64(d))) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(-c - d) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked256 (VPERMI2PS256 x y z) mask) - // result: (VPERMI2PSMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SUBQload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBQload [off1+off2] {sym} val base mem) for { - if v_0.Op != OpAMD64VPERMI2PS256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PSMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPERMI2D256 x y z) mask) - // result: (VPERMI2DMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2D256 { + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2DMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64SUBQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VCVTDQ2PS256 x) mask) - // result: (VCVTDQ2PSMasked256 x mask) + // match: (SUBQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - if v_0.Op != OpAMD64VCVTDQ2PS256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTDQ2PSMasked256) - v.AddArg2(x, mask) + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SUBQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VCVTUDQ2PS256 x) mask) - // result: (VCVTUDQ2PSMasked256 x mask) + // match: (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) + // result: (SUBQ x (MOVQf2i y)) for { - if v_0.Op != OpAMD64VCVTUDQ2PS256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUDQ2PSMasked256) - v.AddArg2(x, mask) + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64SUBQ) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked256 (VCVTPS2PD256 x) mask) - // result: (VCVTPS2PDMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBQmodify(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SUBQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBQmodify [off1+off2] {sym} base val mem) for { - if v_0.Op != OpAMD64VCVTPS2PD256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTPS2PDMasked256) - v.AddArg2(x, mask) + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64SUBQmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU32Masked256 (VCVTDQ2PD256 x) mask) - // result: (VCVTDQ2PDMasked256 x mask) + // match: (SUBQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - if v_0.Op != OpAMD64VCVTDQ2PD256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTDQ2PDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VCVTUDQ2PD256 x) mask) - // result: (VCVTUDQ2PDMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTUDQ2PD256 { + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUDQ2PDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBQmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VMOVDQU32Masked256 (VCVTTPS2DQ256 x) mask) - // result: (VCVTTPS2DQMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBSD(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (SUBSDload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64VCVTTPS2DQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSDload { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2DQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VCVTTPS2QQ256 x) mask) - // result: (VCVTTPS2QQMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2QQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2QQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBSDload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked256 (VCVTTPS2UDQ256 x) mask) - // result: (VCVTTPS2UDQMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBSDload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SUBSDload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBSDload [off1+off2] {sym} val base mem) for { - if v_0.Op != OpAMD64VCVTTPS2UDQ256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2UDQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VCVTTPS2UQQ256 x) mask) - // result: (VCVTTPS2UQQMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2UQQ256 { + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2UQQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBSDload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VDIVPS256 x y) mask) - // result: (VDIVPSMasked256 x y mask) + // match: (SUBSDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBSDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - if v_0.Op != OpAMD64VDIVPS256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPSMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) - // result: (VPMOVSXDQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXDQ256 { + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBSDload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) - // result: (VPMOVZXDQMasked256 x mask) + // match: (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) + // result: (SUBSD x (MOVQi2f y)) for { - if v_0.Op != OpAMD64VPMOVZXDQ256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVQstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPLZCNTD256 x) mask) - // result: (VPLZCNTDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTD256 { + y := v_2.Args[1] + if ptr != v_2.Args[0] { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64SUBSD) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQi2f, typ.Float64) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked256 (VMAXPS256 x y) mask) - // result: (VMAXPSMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBSS(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (SUBSSload x [off] {sym} ptr mem) for { - if v_0.Op != OpAMD64VMAXPS256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVSSload { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPSMasked256) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + break + } + v.reset(OpAMD64SUBSSload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked256 (VPMAXSD256 x y) mask) - // result: (VPMAXSDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64SUBSSload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SUBSSload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (SUBSSload [off1+off2] {sym} val base mem) for { - if v_0.Op != OpAMD64VPMAXSD256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSDMasked256) - v.AddArg3(x, y, mask) + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64SUBSSload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VPMAXUD256 x y) mask) - // result: (VPMAXUDMasked256 x y mask) + // match: (SUBSSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (SUBSSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - if v_0.Op != OpAMD64VPMAXUD256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUDMasked256) - v.AddArg3(x, y, mask) + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64SUBSSload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (VMOVDQU32Masked256 (VMINPS256 x y) mask) - // result: (VMINPSMasked256 x y mask) + // match: (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) + // result: (SUBSS x (MOVLi2f y)) for { - if v_0.Op != OpAMD64VMINPS256 { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVLstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPSMasked256) - v.AddArg3(x, y, mask) + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64SUBSS) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLi2f, typ.Float32) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (VMOVDQU32Masked256 (VPMINSD256 x y) mask) - // result: (VPMINSDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (TESTB (MOVLconst [c]) x) + // result: (TESTBconst [int8(c)] x) for { - if v_0.Op != OpAMD64VPMINSD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64TESTBconst) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPMINUD256 x y) mask) - // result: (VPMINUDMasked256 x y mask) + // match: (TESTB l:(MOVBload {sym} [off] ptr mem) l2) + // cond: l == l2 && l.Uses == 2 && clobber(l) + // result: @l.Block (CMPBconstload {sym} [makeValAndOff(0, off)] ptr mem) for { - if v_0.Op != OpAMD64VPMINUD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + l := v_0 + if l.Op != OpAMD64MOVBload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + l2 := v_1 + if !(l == l2 && l.Uses == 2 && clobber(l)) { + continue + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPBconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VFMADD213PS256 x y z) mask) - // result: (VFMADD213PSMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTBconst(v *Value) bool { + v_0 := v.Args[0] + // match: (TESTBconst [-1] x) + // cond: x.Op != OpAMD64MOVLconst + // result: (TESTB x x) for { - if v_0.Op != OpAMD64VFMADD213PS256 { + if auxIntToInt8(v.AuxInt) != -1 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PSMasked256) - v.AddArg4(x, y, z, mask) + x := v_0 + if !(x.Op != OpAMD64MOVLconst) { + break + } + v.reset(OpAMD64TESTB) + v.AddArg2(x, x) return true } - // match: (VMOVDQU32Masked256 (VFMADDSUB213PS256 x y z) mask) - // result: (VFMADDSUB213PSMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (TESTL (MOVLconst [c]) x) + // result: (TESTLconst [c] x) for { - if v_0.Op != OpAMD64VFMADDSUB213PS256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64TESTLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PSMasked256) - v.AddArg4(x, y, z, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VMULPS256 x y) mask) - // result: (VMULPSMasked256 x y mask) + // match: (TESTL l:(MOVLload {sym} [off] ptr mem) l2) + // cond: l == l2 && l.Uses == 2 && clobber(l) + // result: @l.Block (CMPLconstload {sym} [makeValAndOff(0, off)] ptr mem) for { - if v_0.Op != OpAMD64VMULPS256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + l := v_0 + if l.Op != OpAMD64MOVLload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + l2 := v_1 + if !(l == l2 && l.Uses == 2 && clobber(l)) { + continue + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPLconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPSMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPMULLD256 x y) mask) - // result: (VPMULLDMasked256 x y mask) + // match: (TESTL a:(ANDLload [off] {sym} x ptr mem) a) + // cond: a.Uses == 2 && a.Block == v.Block && clobber(a) + // result: (TESTL (MOVLload [off] {sym} ptr mem) x) for { - if v_0.Op != OpAMD64VPMULLD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + a := v_0 + if a.Op != OpAMD64ANDLload { + continue + } + off := auxIntToInt32(a.AuxInt) + sym := auxToSym(a.Aux) + mem := a.Args[2] + x := a.Args[0] + ptr := a.Args[1] + if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) { + continue + } + v.reset(OpAMD64TESTL) + v0 := b.NewValue0(a.Pos, OpAMD64MOVLload, a.Type) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg2(v0, x) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VFMSUBADD213PS256 x y z) mask) - // result: (VFMSUBADD213PSMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (TESTLconst [c] (MOVLconst [c])) + // cond: c == 0 + // result: (FlagEQ) for { - if v_0.Op != OpAMD64VFMSUBADD213PS256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c == 0) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PSMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64FlagEQ) return true } - // match: (VMOVDQU32Masked256 (VPOPCNTD256 x) mask) - // result: (VPOPCNTDMasked256 x mask) + // match: (TESTLconst [c] (MOVLconst [c])) + // cond: c < 0 + // result: (FlagLT_UGT) for { - if v_0.Op != OpAMD64VPOPCNTD256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c < 0) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64FlagLT_UGT) return true } - // match: (VMOVDQU32Masked256 (VPERMPS256 x y) mask) - // result: (VPERMPSMasked256 x y mask) + // match: (TESTLconst [c] (MOVLconst [c])) + // cond: c > 0 + // result: (FlagGT_UGT) for { - if v_0.Op != OpAMD64VPERMPS256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0.AuxInt) != c || !(c > 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPSMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64FlagGT_UGT) return true } - // match: (VMOVDQU32Masked256 (VPERMD256 x y) mask) - // result: (VPERMDMasked256 x y mask) + // match: (TESTLconst [-1] x) + // cond: x.Op != OpAMD64MOVLconst + // result: (TESTL x x) for { - if v_0.Op != OpAMD64VPERMD256 { + if auxIntToInt32(v.AuxInt) != -1 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPROLD256 [a] x) mask) - // result: (VPROLDMasked256 [a] x mask) - for { - if v_0.Op != OpAMD64VPROLD256 { + x := v_0 + if !(x.Op != OpAMD64MOVLconst) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64TESTL) + v.AddArg2(x, x) return true } - // match: (VMOVDQU32Masked256 (VPRORD256 [a] x) mask) - // result: (VPRORDMasked256 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (TESTQ (MOVQconst [c]) x) + // cond: is32Bit(c) + // result: (TESTQconst [int32(c)] x) for { - if v_0.Op != OpAMD64VPRORD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(is32Bit(c)) { + continue + } + v.reset(OpAMD64TESTQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPROLVD256 x y) mask) - // result: (VPROLVDMasked256 x y mask) + // match: (TESTQ l:(MOVQload {sym} [off] ptr mem) l2) + // cond: l == l2 && l.Uses == 2 && clobber(l) + // result: @l.Block (CMPQconstload {sym} [makeValAndOff(0, off)] ptr mem) for { - if v_0.Op != OpAMD64VPROLVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + l := v_0 + if l.Op != OpAMD64MOVQload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + l2 := v_1 + if !(l == l2 && l.Uses == 2 && clobber(l)) { + continue + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPQconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPRORVD256 x y) mask) - // result: (VPRORVDMasked256 x y mask) + // match: (TESTQ a:(ANDQload [off] {sym} x ptr mem) a) + // cond: a.Uses == 2 && a.Block == v.Block && clobber(a) + // result: (TESTQ (MOVQload [off] {sym} ptr mem) x) for { - if v_0.Op != OpAMD64VPRORVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + a := v_0 + if a.Op != OpAMD64ANDQload { + continue + } + off := auxIntToInt32(a.AuxInt) + sym := auxToSym(a.Aux) + mem := a.Args[2] + x := a.Args[0] + ptr := a.Args[1] + if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) { + continue + } + v.reset(OpAMD64TESTQ) + v0 := b.NewValue0(a.Pos, OpAMD64MOVQload, a.Type) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + v.AddArg2(v0, x) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) - // result: (VPMOVSDBMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (TESTQconst [c] (MOVQconst [d])) + // cond: int64(c) == d && c == 0 + // result: (FlagEQ) for { - if v_0.Op != OpAMD64VPMOVSDB128_256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) - // result: (VPACKSSDWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPACKSSDW256 { + d := auxIntToInt64(v_0.AuxInt) + if !(int64(c) == d && c == 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64FlagEQ) return true } - // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) - // result: (VPMOVSDWMasked128_256 x mask) + // match: (TESTQconst [c] (MOVQconst [d])) + // cond: int64(c) == d && c < 0 + // result: (FlagLT_UGT) for { - if v_0.Op != OpAMD64VPMOVSDW128_256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) - // result: (VPMOVSDWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDW256 { + d := auxIntToInt64(v_0.AuxInt) + if !(int64(c) == d && c < 0) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64FlagLT_UGT) return true } - // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) - // result: (VPMOVUSDBMasked128_256 x mask) + // match: (TESTQconst [c] (MOVQconst [d])) + // cond: int64(c) == d && c > 0 + // result: (FlagGT_UGT) for { - if v_0.Op != OpAMD64VPMOVUSDB128_256 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) - // result: (VPACKUSDWMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPACKUSDW256 { + d := auxIntToInt64(v_0.AuxInt) + if !(int64(c) == d && c > 0) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64FlagGT_UGT) return true } - // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) - // result: (VPMOVUSDWMasked128_256 x mask) + // match: (TESTQconst [-1] x) + // cond: x.Op != OpAMD64MOVQconst + // result: (TESTQ x x) for { - if v_0.Op != OpAMD64VPMOVUSDW128_256 { + if auxIntToInt32(v.AuxInt) != -1 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) - // result: (VPMOVUSDWMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDW256 { + x := v_0 + if !(x.Op != OpAMD64MOVQconst) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64TESTQ) + v.AddArg2(x, x) return true } - // match: (VMOVDQU32Masked256 (VSCALEFPS256 x y) mask) - // result: (VSCALEFPSMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTW(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (TESTW (MOVLconst [c]) x) + // result: (TESTWconst [int16(c)] x) for { - if v_0.Op != OpAMD64VSCALEFPS256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_0.AuxInt) + x := v_1 + v.reset(OpAMD64TESTWconst) + v.AuxInt = int16ToAuxInt(int16(c)) + v.AddArg(x) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPSMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSHLDD256 [a] x y) mask) - // result: (VPSHLDDMasked256 [a] x y mask) + // match: (TESTW l:(MOVWload {sym} [off] ptr mem) l2) + // cond: l == l2 && l.Uses == 2 && clobber(l) + // result: @l.Block (CMPWconstload {sym} [makeValAndOff(0, off)] ptr mem) for { - if v_0.Op != OpAMD64VPSHLDD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + l := v_0 + if l.Op != OpAMD64MOVWload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + l2 := v_1 + if !(l == l2 && l.Uses == 2 && clobber(l)) { + continue + } + b = l.Block + v0 := b.NewValue0(l.Pos, OpAMD64CMPWconstload, types.TypeFlags) + v.copyOf(v0) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, off)) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSLLD256 x y) mask) - // result: (VPSLLDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (TESTWconst [-1] x) + // cond: x.Op != OpAMD64MOVLconst + // result: (TESTW x x) for { - if v_0.Op != OpAMD64VPSLLD256 { + if auxIntToInt16(v.AuxInt) != -1 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPSHRDD256 [a] x y) mask) - // result: (VPSHRDDMasked256 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHRDD256 { + x := v_0 + if !(x.Op != OpAMD64MOVLconst) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64TESTW) + v.AddArg2(x, x) return true } - // match: (VMOVDQU32Masked256 (VPSRAD256 x y) mask) - // result: (VPSRADMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSRAD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSRLD256 x y) mask) - // result: (VPSRLDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSHLDVD256 x y z) mask) - // result: (VPSHLDVDMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSHLDVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVDMasked256) - v.AddArg4(x, y, z, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSLLVD256 x y) mask) - // result: (VPSLLVDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSHRDVD256 x y z) mask) - // result: (VPSHRDVDMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPS512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSHRDVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVDMasked256) - v.AddArg4(x, y, z, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSRAVD256 x y) mask) - // result: (VPSRAVDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VPSRLVD256 x y) mask) - // result: (VPSRLVDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLVD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVDMasked256) - v.AddArg3(x, y, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VSQRTPS256 x) mask) - // result: (VSQRTPSMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VSQRTPS256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked256) - v.AddArg2(x, mask) - return true + break } - // match: (VMOVDQU32Masked256 (VSUBPS256 x y) mask) - // result: (VSUBPSMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VSUBPS256 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPSMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPSUBD256 x y) mask) - // result: (VPSUBDMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSUBD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBDMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCMPPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) - // result: (VPMOVDBMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVDB128_256 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) - // result: (VPMOVDWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVDW128_256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128_256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCMPPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) - // result: (VPMOVDWMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVDW256 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDWMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) - // result: (VPSHUFDMasked256 [a] x mask) - for { - if v_0.Op != OpAMD64VPSHUFD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCMPPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked256 (VPSLLD256const [a] x) mask) - // result: (VPSLLDMasked256const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLD256const { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) - // result: (VPSRLDMasked256const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRLD256const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCMPPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask) - // result: (VPSRADMasked256const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPSRAD256const { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCMPPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU32Masked512 (VPABSD512 x) mask) - // result: (VPABSDMasked512 x mask) + // match: (VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPABSD512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VADDPS512 x y) mask) - // result: (VADDPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VADDPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCMPPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPADDD512 x y) mask) - // result: (VPADDDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPADDD512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPANDD512 x y) mask) - // result: (VPANDDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPANDD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCMPPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPANDND512 x y) mask) - // result: (VPANDNDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPANDND512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDNDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) - // result: (VBROADCASTSSMasked512 x mask) - for { - if v_0.Op != OpAMD64VBROADCASTSS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSSMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCMPPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) - // result: (VPBROADCASTDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPBROADCASTD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTDMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTDQ2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) - // result: (VRNDSCALEPSMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTDQ2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VRNDSCALEPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTDQ2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) - // result: (VREDUCEPSMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTDQ2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VREDUCEPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) - // result: (VPERMI2PSMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2PS512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PSMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTDQ2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) - // result: (VPERMI2DMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PS512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PS512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPERMI2D512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2DMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTDQ2PS512 x) mask) - // result: (VCVTDQ2PSMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTDQ2PS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTDQ2PSMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTDQ2PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VCVTUDQ2PS512 x) mask) - // result: (VCVTUDQ2PSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTDQ2PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PSMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTUDQ2PS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTUDQ2PSMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTPS2PD512 x) mask) - // result: (VCVTPS2PDMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTPS2PD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTPS2PDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTDQ2PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VCVTDQ2PD512 x) mask) - // result: (VCVTDQ2PDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTDQ2PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTDQ2PD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTDQ2PDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTUDQ2PD512 x) mask) - // result: (VCVTUDQ2PDMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTUDQ2PD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUDQ2PDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTDQ2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) - // result: (VCVTTPS2DQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTDQ2PSMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTDQ2PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTDQ2PSMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTTPS2DQ512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTTPS2DQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTTPS2QQ512 x) mask) - // result: (VCVTTPS2QQMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2QQ512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2QQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTDQ2PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VCVTTPS2UDQ512 x) mask) - // result: (VCVTTPS2UDQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPD2PS256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTPD2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPD2PS256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTTPS2UDQ512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2UDQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTTPS2UQQ512 x) mask) - // result: (VCVTTPS2UQQMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2UQQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2UQQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTPD2PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) - // result: (VDIVPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPD2PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPD2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPD2PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VDIVPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VDIVPSMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) - // result: (VPMOVSXDQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXDQ512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTPD2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) - // result: (VPMOVZXDQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPD2PSXMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPD2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPD2PSXMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVZXDQ512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask) - // result: (VPLZCNTDMasked512 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTPD2PSXMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask) - // result: (VMAXPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPD2PSYMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPD2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPD2PSYMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VMAXPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VMAXPSMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMAXSD512 x y) mask) - // result: (VPMAXSDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMAXSD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTPD2PSYMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPMAXUD512 x y) mask) - // result: (VPMAXUDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTPS2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMAXUD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VMINPS512 x y) mask) - // result: (VMINPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VMINPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTPS2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPMINSD512 x y) mask) - // result: (VPMINSDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2PDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPS2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMINSD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMINSDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMINUD512 x y) mask) - // result: (VPMINUDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMINUD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTPS2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VFMADD213PS512 x y z) mask) - // result: (VFMADD213PSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPS2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VFMADD213PS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VFMADD213PSMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VFMADDSUB213PS512 x y z) mask) - // result: (VFMADDSUB213PSMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VFMADDSUB213PS512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PSMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTPS2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VMULPS512 x y) mask) - // result: (VMULPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PD128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VMULPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPSMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMULLD512 x y) mask) - // result: (VPMULLDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMULLD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VFMSUBADD213PS512 x y z) mask) - // result: (VFMSUBADD213PSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PD256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VFMSUBADD213PS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PSMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPOPCNTD512 x) mask) - // result: (VPOPCNTDMasked512 x mask) - for { - if v_0.Op != OpAMD64VPOPCNTD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTQQ2PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPORD512 x y) mask) - // result: (VPORDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPORD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPORDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPERMPS512 x y) mask) - // result: (VPERMPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPERMPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPERMD512 x y) mask) - // result: (VPERMDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PDMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPERMD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPERMDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VRCP14PS512 x) mask) - // result: (VRCP14PSMasked512 x mask) - for { - if v_0.Op != OpAMD64VRCP14PS512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRCP14PSMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTQQ2PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VRSQRT14PS512 x) mask) - // result: (VRSQRT14PSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VRSQRT14PS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VRSQRT14PSMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPROLD512 [a] x) mask) - // result: (VPROLDMasked512 [a] x mask) - for { - if v_0.Op != OpAMD64VPROLD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTQQ2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPRORD512 [a] x) mask) - // result: (VPRORDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPRORD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPRORDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPROLVD512 x y) mask) - // result: (VPROLVDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPROLVD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPRORVD512 x y) mask) - // result: (VPRORVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PS256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PS256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPRORVD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) - // result: (VPMOVSDBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSDB128_512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128_512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTQQ2PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPACKSSDW512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) - // result: (VPMOVUSDBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSDB128_512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128_512) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTQQ2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PSX128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PSX128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPACKUSDW512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) - // result: (VSCALEFPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VSCALEFPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PSX128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHLDD512 [a] x y) mask) - // result: (VPSHLDDMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PSXMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PSXMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSHLDD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSHLDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSLLD512 x y) mask) - // result: (VPSLLDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSLLD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PSXMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHRDD512 [a] x y) mask) - // result: (VPSHRDDMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PSY128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PSY128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSHRDD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSRAD512 x y) mask) - // result: (VPSRADMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRAD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTQQ2PSY128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSRLD512 x y) mask) - // result: (VPSRLDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTQQ2PSYMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTQQ2PSYMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSRLDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSHLDVD512 x y z) mask) - // result: (VPSHLDVDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHLDVD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTQQ2PSYMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSLLVD512 x y) mask) - // result: (VPSLLVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2DQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2DQ256 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2DQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSLLVD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSHRDVD512 x y z) mask) - // result: (VPSHRDVDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHRDVD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTTPD2DQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSRAVD512 x y) mask) - // result: (VPSRAVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2DQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2DQMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2DQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAVD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSRAVDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSRLVD512 x y) mask) - // result: (VPSRLVDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVD512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2DQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VSQRTPS512 x) mask) - // result: (VSQRTPSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2DQXMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2DQXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2DQXMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VSQRTPS512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VSQRTPSMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VSUBPS512 x y) mask) - // result: (VSUBPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VSUBPS512 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2DQXMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSUBD512 x y) mask) - // result: (VPSUBDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2DQYMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2DQYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2DQYMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSUBD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBDMasked512) - v.AddArg3(x, y, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPD2DQYMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) - // result: (VPMOVDBMasked128_512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2QQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2QQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQ128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVDB128_512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPXORD512 x y) mask) - // result: (VPXORDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPXORD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPXORDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2QQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) - // result: (VPSHUFDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2QQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2QQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSHUFD512 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSLLD512const [a] x) mask) - // result: (VPSLLDMasked512const [a] x mask) - for { - if v_0.Op != OpAMD64VPSLLD512const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2QQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) - // result: (VPSRLDMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2QQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2QQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSRLD512const { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) - // result: (VPSRADMasked512const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRAD512const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2QQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU64Masked128 (VPABSQ128 x) mask) - // result: (VPABSQMasked128 x mask) + // match: (VCVTTPD2QQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPABSQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPABSQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VADDPD128 x y) mask) - // result: (VADDPDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VADDPD128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2QQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPADDQ128 x y) mask) - // result: (VPADDQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2QQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPADDQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPADDQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) - // result: (VPBROADCASTQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPBROADCASTQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2QQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VRNDSCALEPD128 [a] x) mask) - // result: (VRNDSCALEPDMasked128 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2QQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2QQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2QQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VRNDSCALEPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) - // result: (VREDUCEPDMasked128 [a] x mask) - for { - if v_0.Op != OpAMD64VREDUCEPD128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2QQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPERMI2PD128 x y z) mask) - // result: (VPERMI2PDMasked128 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UDQ256 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPERMI2PD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PDMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPERMI2Q128 x y z) mask) - // result: (VPERMI2QMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2Q128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2QMasked128) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTTPD2UDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTPD2PSX128 x) mask) - // result: (VCVTPD2PSXMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UDQMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTPD2PSX128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTPD2PSXMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTPD2PSY128 x) mask) - // result: (VCVTPD2PSYMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTPD2PSY128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTPD2PSYMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTQQ2PSX128 x) mask) - // result: (VCVTQQ2PSXMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQX128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UDQX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQX128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTQQ2PSX128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTQQ2PSXMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTQQ2PSY128 x) mask) - // result: (VCVTQQ2PSYMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTQQ2PSY128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTQQ2PSYMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UDQX128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTUQQ2PSX128 x) mask) - // result: (VCVTUQQ2PSXMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQXMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UDQXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQXMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTUQQ2PSX128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTUQQ2PSXMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTUQQ2PSY128 x) mask) - // result: (VCVTUQQ2PSYMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTUQQ2PSY128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUQQ2PSYMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UDQXMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTQQ2PD128 x) mask) - // result: (VCVTQQ2PDMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQY128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UDQY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQY128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTQQ2PD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTQQ2PDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTUQQ2PD128 x) mask) - // result: (VCVTUQQ2PDMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTUQQ2PD128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUQQ2PDMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UDQY128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTTPD2DQX128 x) mask) - // result: (VCVTTPD2DQXMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UDQYMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UDQYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UDQYMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTTPD2DQX128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTTPD2DQXMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTTPD2DQY128 x) mask) - // result: (VCVTTPD2DQYMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2DQY128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2DQYMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UDQYMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTTPD2QQ128 x) mask) - // result: (VCVTTPD2QQMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UQQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQ128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTTPD2QQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2QQMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTTPD2UDQX128 x) mask) - // result: (VCVTTPD2UDQXMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2UDQX128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UDQXMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UQQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VCVTTPD2UDQY128 x) mask) - // result: (VCVTTPD2UDQYMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UQQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTTPD2UDQY128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UDQYMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VCVTTPD2UQQ128 x) mask) - // result: (VCVTTPD2UQQMasked128 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2UQQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UQQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UQQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VDIVPD128 x y) mask) - // result: (VDIVPDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPD2UQQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VDIVPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPLZCNTQ128 x) mask) - // result: (VPLZCNTQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPD2UQQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VMAXPD128 x y) mask) - // result: (VMAXPDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UQQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VMAXPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VMAXPDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMAXSQ128 x y) mask) - // result: (VPMAXSQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMAXSQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2UQQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPMAXUQ128 x y) mask) - // result: (VPMAXUQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UQQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXUQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXUQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VMINPD128 x y) mask) - // result: (VMINPDMasked128 x y mask) - for { - if v_0.Op != OpAMD64VMINPD128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPDMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2UQQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPMINSQ128 x y) mask) - // result: (VPMINSQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPD2UQQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPD2UQQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPD2UQQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMINSQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMINSQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMINUQ128 x y) mask) - // result: (VPMINUQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMINUQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPD2UQQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VFMADD213PD128 x y z) mask) - // result: (VFMADD213PDMasked128 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VFMADD213PD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PDMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked128 (VFMADDSUB213PD128 x y z) mask) - // result: (VFMADDSUB213PDMasked128 x y z mask) - for { - if v_0.Op != OpAMD64VFMADDSUB213PD128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PDMasked128) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTTPS2DQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VMULPD128 x y) mask) - // result: (VMULPDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VMULPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VMULPDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMULLQ128 x y) mask) - // result: (VPMULLQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPMULLQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2DQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VFMSUBADD213PD128 x y z) mask) - // result: (VFMSUBADD213PDMasked128 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VFMSUBADD213PD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VFMSUBADD213PDMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPOPCNTQ128 x) mask) - // result: (VPOPCNTQMasked128 x mask) - for { - if v_0.Op != OpAMD64VPOPCNTQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2DQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VRCP14PD128 x) mask) - // result: (VRCP14PDMasked128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VRCP14PD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VRCP14PDMasked128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VRSQRT14PD128 x) mask) - // result: (VRSQRT14PDMasked128 x mask) - for { - if v_0.Op != OpAMD64VRSQRT14PD128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2DQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPROLQ128 [a] x) mask) - // result: (VPROLQMasked128 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2QQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2QQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2QQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPROLQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPRORQ128 [a] x) mask) - // result: (VPRORQMasked128 [a] x mask) - for { - if v_0.Op != OpAMD64VPRORQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2QQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPROLVQ128 x y) mask) - // result: (VPROLVQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2QQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2QQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2QQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPROLVQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPRORVQ128 x y) mask) - // result: (VPRORVQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPRORVQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2QQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) - // result: (VPMOVSQBMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2QQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2QQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2QQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSQB128_128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) - // result: (VPMOVSQWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2QQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) - // result: (VPMOVSQDMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2QQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2QQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2QQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSQD128_128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) - // result: (VPMOVUSQBMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQB128_128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2QQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) - // result: (VPMOVUSQWMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQ128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVUSQW128_128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) - // result: (VPMOVUSQDMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQD128_128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2UDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VSCALEFPD128 x y) mask) - // result: (VSCALEFPDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VSCALEFPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSHLDQ128 [a] x y) mask) - // result: (VPSHLDQMasked128 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHLDQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPSLLQ128 x y) mask) - // result: (VPSLLQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSLLQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSHRDQ128 [a] x y) mask) - // result: (VPSHRDQMasked128 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHRDQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPSRAQ128 x y) mask) - // result: (VPSRAQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2UDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSRAQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSRLQ128 x y) mask) - // result: (VPSRLQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPSRLQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPSHLDVQ128 x y z) mask) - // result: (VPSHLDVQMasked128 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2UDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSHLDVQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSHLDVQMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSLLVQ128 x y) mask) - // result: (VPSLLVQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPSLLVQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPSHRDVQ128 x y z) mask) - // result: (VPSHRDVQMasked128 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UDQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2UDQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UDQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDVQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPSHRDVQMasked128) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSRAVQ128 x y) mask) - // result: (VPSRAVQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPSRAVQ128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPSRLVQ128 x y) mask) - // result: (VPSRLVQMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UQQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2UQQ256 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UQQ256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSRLVQ128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVQMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VSQRTPD128 x) mask) - // result: (VSQRTPDMasked128 x mask) - for { - if v_0.Op != OpAMD64VSQRTPD128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2UQQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VSUBPD128 x y) mask) - // result: (VSUBPDMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UQQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2UQQ512 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UQQ512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VSUBPD128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPDMasked128) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSUBQ128 x y) mask) - // result: (VPSUBQMasked128 x y mask) - for { - if v_0.Op != OpAMD64VPSUBQ128 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBQMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTTPS2UQQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) - // result: (VPMOVQBMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UQQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2UQQMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UQQMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVQB128_128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) - // result: (VPMOVQWMasked128_128 x mask) - for { - if v_0.Op != OpAMD64VPMOVQW128_128 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_128) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2UQQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) - // result: (VPMOVQDMasked128_128 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2UQQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2UQQMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2UQQMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVQD128_128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128_128) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) - // result: (VPSLLQMasked128const [a] x mask) - for { - if v_0.Op != OpAMD64VPSLLQ128const { + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTTPS2UQQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) - // result: (VPSRLQMasked128const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUDQ2PD256 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PD256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSRLQ128const { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTUDQ2PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) - // result: (VPSRAQMasked128const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPSRAQ128const { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked128const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTUDQ2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VCVTUDQ2PDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU64Masked256 (VPABSQ256 x) mask) - // result: (VPABSQMasked256 x mask) + // match: (VCVTUDQ2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPABSQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPABSQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VADDPD256 x y) mask) - // result: (VADDPDMasked256 x y mask) - for { - if v_0.Op != OpAMD64VADDPD256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPDMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUDQ2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPADDQ256 x y) mask) - // result: (VPADDQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUDQ2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPADDQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPADDQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) - // result: (VBROADCASTSDMasked256 x mask) - for { - if v_0.Op != OpAMD64VBROADCASTSD256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) - // result: (VPBROADCASTQMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PS128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUDQ2PS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PS128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPBROADCASTQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VRNDSCALEPD256 [a] x) mask) - // result: (VRNDSCALEPDMasked256 [a] x mask) - for { - if v_0.Op != OpAMD64VRNDSCALEPD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) - // result: (VREDUCEPDMasked256 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PS256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUDQ2PS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PS256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VREDUCEPD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPERMI2PD256 x y z) mask) - // result: (VPERMI2PDMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2PD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PDMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTUDQ2PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPERMI2Q256 x y z) mask) - // result: (VPERMI2QMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PS512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PS512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPERMI2Q256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2QMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked256 (VCVTPD2PS256 x) mask) - // result: (VCVTPD2PSMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTPD2PS256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTPD2PSMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VCVTQQ2PS256 x) mask) - // result: (VCVTQQ2PSMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUDQ2PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PSMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTQQ2PS256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTQQ2PSMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VCVTUQQ2PS256 x) mask) - // result: (VCVTUQQ2PSMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTUQQ2PS256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUQQ2PSMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VCVTQQ2PD256 x) mask) - // result: (VCVTQQ2PDMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUDQ2PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTQQ2PD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTQQ2PDMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VCVTUQQ2PD256 x) mask) - // result: (VCVTUQQ2PDMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTUQQ2PD256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUQQ2PDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VCVTTPD2DQ256 x) mask) - // result: (VCVTTPD2DQMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUDQ2PSMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUDQ2PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUDQ2PSMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VCVTTPD2DQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCVTTPD2DQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VCVTTPD2QQ256 x) mask) - // result: (VCVTTPD2QQMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2QQ256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2QQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUDQ2PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VCVTTPD2UDQ256 x) mask) - // result: (VCVTTPD2UDQMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PD128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VCVTTPD2UDQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UDQMasked256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VCVTTPD2UQQ256 x) mask) - // result: (VCVTTPD2UQQMasked256 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2UQQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UQQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUQQ2PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VDIVPD256 x y) mask) - // result: (VDIVPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PD256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VDIVPD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPLZCNTQ256 x) mask) - // result: (VPLZCNTQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUQQ2PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VMAXPD256 x y) mask) - // result: (VMAXPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VMAXPD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMAXSQ256 x y) mask) - // result: (VPMAXSQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPMAXSQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUQQ2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPMAXUQ256 x y) mask) - // result: (VPMAXUQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PDMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXUQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXUQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VMINPD256 x y) mask) - // result: (VMINPDMasked256 x y mask) - for { - if v_0.Op != OpAMD64VMINPD256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPDMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUQQ2PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPMINSQ256 x y) mask) - // result: (VPMINSQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMINSQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPMINSQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMINUQ256 x y) mask) - // result: (VPMINUQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPMINUQ256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUQQ2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VFMADD213PD256 x y z) mask) - // result: (VFMADD213PDMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VFMADD213PD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VFMADD213PDMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked256 (VFMADDSUB213PD256 x y z) mask) - // result: (VFMADDSUB213PDMasked256 x y z mask) - for { - if v_0.Op != OpAMD64VFMADDSUB213PD256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PDMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VCVTUQQ2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VMULPD256 x y) mask) - // result: (VMULPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PS256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PS256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VMULPD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMULLQ256 x y) mask) - // result: (VPMULLQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPMULLQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUQQ2PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VFMSUBADD213PD256 x y z) mask) - // result: (VFMSUBADD213PDMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VFMSUBADD213PD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VFMSUBADD213PDMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPOPCNTQ256 x) mask) - // result: (VPOPCNTQMasked256 x mask) - for { - if v_0.Op != OpAMD64VPOPCNTQ256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUQQ2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPERMPD256 x y) mask) - // result: (VPERMPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PSX128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PSX128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPERMPD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPERMQ256 x y) mask) - // result: (VPERMQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPERMQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VCVTUQQ2PSX128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VRCP14PD256 x) mask) - // result: (VRCP14PDMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PSXMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PSXMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VRCP14PD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VRCP14PDMasked256) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTUQQ2PSXMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VRSQRT14PD256 x) mask) - // result: (VRSQRT14PDMasked256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PSY128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTUQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PSY128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VRSQRT14PD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked256) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTUQQ2PSY128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPROLQ256 [a] x) mask) - // result: (VPROLQMasked256 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VCVTUQQ2PSYMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTUQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTUQQ2PSYMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPROLQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPROLQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPRORQ256 [a] x) mask) - // result: (VPRORQMasked256 [a] x mask) - for { - if v_0.Op != OpAMD64VPRORQ256 { + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VCVTUQQ2PSYMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPROLVQ256 x y) mask) - // result: (VPROLVQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPROLVQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPRORVQ256 x y) mask) - // result: (VPRORVQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPRORVQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VDIVPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) - // result: (VPMOVSQBMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSQB128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) - // result: (VPMOVSQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_256) - v.AddArg2(x, mask) + v.reset(OpAMD64VDIVPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) - // result: (VPMOVSQDMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSQD128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) - // result: (VPMOVSQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VDIVPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) - // result: (VPMOVUSQBMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVUSQB128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) - // result: (VPMOVUSQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQW128_256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_256) - v.AddArg2(x, mask) + v.reset(OpAMD64VDIVPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) - // result: (VPMOVUSQDMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPS512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMOVUSQD128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) - // result: (VPMOVUSQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VDIVPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VSCALEFPD256 x y) mask) - // result: (VSCALEFPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VSCALEFPD256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSHLDQ256 [a] x y) mask) - // result: (VPSHLDQMasked256 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHLDQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VDIVPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSLLQ256 x y) mask) - // result: (VPSLLQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSHRDQ256 [a] x y) mask) - // result: (VPSHRDQMasked256 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHRDQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VDIVPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSRAQ256 x y) mask) - // result: (VPSRAQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSRLQ256 x y) mask) - // result: (VPSRLQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSRLQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VDIVPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSHLDVQ256 x y z) mask) - // result: (VPSHLDVQMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PD512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSHLDVQ256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVQMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSLLVQ256 x y) mask) - // result: (VPSLLVQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSLLVQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADD213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPSHRDVQ256 x y z) mask) - // result: (VPSHRDVQMasked256 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDVQ256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVQMasked256) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSRAVQ256 x y) mask) - // result: (VPSRAVQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSRAVQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADD213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSRLVQ256 x y) mask) - // result: (VPSRLVQMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLVQ256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVQMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VSQRTPD256 x) mask) - // result: (VSQRTPDMasked256 x mask) - for { - if v_0.Op != OpAMD64VSQRTPD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADD213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VSUBPD256 x y) mask) - // result: (VSUBPDMasked256 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VSUBPD256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPDMasked256) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSUBQ256 x y) mask) - // result: (VPSUBQMasked256 x y mask) - for { - if v_0.Op != OpAMD64VPSUBQ256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBQMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADD213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) - // result: (VPMOVQBMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PS512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPMOVQB128_256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) - // result: (VPMOVQWMasked128_256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQW128_256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_256) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADD213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) - // result: (VPMOVQDMasked128_256 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVQD128_256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128_256) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) - // result: (VPMOVQDMasked256 x mask) - for { - if v_0.Op != OpAMD64VPMOVQD256 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQDMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADD213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) - // result: (VPSLLQMasked256const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLQ256const { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) - // result: (VPSRLQMasked256const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRLQ256const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADD213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) - // result: (VPSRAQMasked256const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAQ256const { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked256const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU64Masked512 (VPABSQ512 x) mask) - // result: (VPABSQMasked512 x mask) + // match: (VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PD512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPABSQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VADDPD512 x y) mask) - // result: (VADDPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VADDPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADDSUB213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPADDQ512 x y) mask) - // result: (VPADDQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPADDQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPANDQ512 x y) mask) - // result: (VPANDQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPANDQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADDSUB213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPANDNQ512 x y) mask) - // result: (VPANDNQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPANDNQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDNQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) - // result: (VBROADCASTSDMasked512 x mask) - for { - if v_0.Op != OpAMD64VBROADCASTSD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADDSUB213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) - // result: (VPBROADCASTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPBROADCASTQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) - // result: (VRNDSCALEPDMasked512 [a] x mask) - for { - if v_0.Op != OpAMD64VRNDSCALEPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADDSUB213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) - // result: (VREDUCEPDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PS512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VREDUCEPD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) - // result: (VPERMI2PDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VFMADDSUB213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) - // result: (VPERMI2QMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPERMI2Q512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2QMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked512 (VCVTQQ2PD512 x) mask) - // result: (VCVTQQ2PDMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTQQ2PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTQQ2PDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADDSUB213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VCVTUQQ2PD512 x) mask) - // result: (VCVTUQQ2PDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VCVTUQQ2PD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTUQQ2PDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VCVTTPD2QQ512 x) mask) - // result: (VCVTTPD2QQMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTTPD2QQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2QQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VFMADDSUB213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VCVTTPD2UQQ512 x) mask) - // result: (VCVTTPD2UQQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VCVTTPD2UQQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPD2UQQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) - // result: (VDIVPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VDIVPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMADDSUB213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) - // result: (VPLZCNTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PD512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPLZCNTQ512 { - break + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask) - // result: (VMAXPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VMAXPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPMAXSQ512 x y) mask) - // result: (VPMAXSQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXSQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMAXUQ512 x y) mask) - // result: (VPMAXUQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMAXUQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VMINPD512 x y) mask) - // result: (VMINPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VMINPD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMINSQ512 x y) mask) - // result: (VPMINSQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMINSQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPMINUQ512 x y) mask) - // result: (VPMINUQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPMINUQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VFMADD213PD512 x y z) mask) - // result: (VFMADD213PDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VFMADD213PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VFMSUBADD213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VFMADDSUB213PD512 x y z) mask) - // result: (VFMADDSUB213PDMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PS512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VFMADDSUB213PD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PDMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked512 (VMULPD512 x y) mask) - // result: (VMULPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VMULPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPMULLQ512 x y) mask) - // result: (VPMULLQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPMULLQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VFMSUBADD213PD512 x y z) mask) - // result: (VFMSUBADD213PDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VFMSUBADD213PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VFMSUBADD213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) - // result: (VPOPCNTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPOPCNTQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPORQ512 x y) mask) - // result: (VPORQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPORQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPORQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPERMPD512 x y) mask) - // result: (VPERMPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPERMPD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPERMQ512 x y) mask) - // result: (VPERMQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPERMQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VFMSUBADD213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VRCP14PD512 x) mask) - // result: (VRCP14PDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VRCP14PD512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRCP14PDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VRSQRT14PD512 x) mask) - // result: (VRSQRT14PDMasked512 x mask) - for { - if v_0.Op != OpAMD64VRSQRT14PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQB128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPROLQ512 [a] x) mask) - // result: (VPROLQMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPROLQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPRORQ512 [a] x) mask) - // result: (VPRORQMasked512 [a] x mask) - for { - if v_0.Op != OpAMD64VPRORQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQB256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPROLVQ512 x y) mask) - // result: (VPROLVQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPROLVQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPRORVQ512 x y) mask) - // result: (VPRORVQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPRORVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQB512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) - // result: (VPMOVSQBMasked128_512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSQB128_512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) - // result: (VPMOVSQWMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSQW128_512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128_512) - v.AddArg2(x, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) - // result: (VPMOVUSQBMasked128_512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVUSQB128_512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) - // result: (VPMOVUSQWMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVUSQW128_512 { - break - } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VSCALEFPD512 x y) mask) - // result: (VSCALEFPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VSCALEFPD512 { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSHLDQ512 [a] x y) mask) - // result: (VPSHLDQMasked512 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHLDQ512 { - break - } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSLLQ512 x y) mask) - // result: (VPSLLQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSLLQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSHRDQ512 [a] x y) mask) - // result: (VPSHRDQMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSRAQ512 x y) mask) - // result: (VPSRAQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRAQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRLQ512 x y) mask) - // result: (VPSRLQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPSRLQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSHLDVQ512 x y z) mask) - // result: (VPSHLDVQMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHLDVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVQMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VGF2P8AFFINEQB128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSLLVQ512 x y) mask) - // result: (VPSLLVQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPSLLVQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSHRDVQ512 x y z) mask) - // result: (VPSHRDVQMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHRDVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVQMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VGF2P8AFFINEQB256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRAVQ512 x y) mask) - // result: (VPSRAVQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - if v_0.Op != OpAMD64VPSRAVQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSRLVQ512 x y) mask) - // result: (VPSRLVQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEQB512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VSQRTPD512 x) mask) - // result: (VSQRTPDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VSQRTPD512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VSUBPD512 x y) mask) - // result: (VSUBPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VSUBPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSUBQ512 x y) mask) - // result: (VPSUBQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSUBQ512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) - // result: (VPMOVQBMasked128_512 x mask) - for { - if v_0.Op != OpAMD64VPMOVQB128_512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128_512) - v.AddArg2(x, mask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) - // result: (VPMOVQWMasked128_512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVQW128_512 { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128_512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPXORQ512 x y) mask) - // result: (VPXORQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPXORQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPXORQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) - // result: (VPSLLQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VMAXPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSLLQ512const { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true + break } - // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) - // result: (VPSRLQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VMAXPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLQ512const { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true + break } - // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) - // result: (VPSRAQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VMAXPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAQ512const { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true + break } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU8Masked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VMAXPDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU8Masked128 (VPABSB128 x) mask) - // result: (VPABSBMasked128 x mask) + // match: (VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPABSB128 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSBMasked128) - v.AddArg2(x, mask) - return true + break } - // match: (VMOVDQU8Masked128 (VPADDB128 x y) mask) - // result: (VPADDBMasked128 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VMAXPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPS512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPADDB128 { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPS512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVD(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVD x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVDload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVLload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU16Masked128 (VPABSW128 x) mask) + // result: (VPABSWMasked128 x mask) + for { + if v_0.Op != OpAMD64VPABSW128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSWMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked128 (VPADDW128 x y) mask) + // result: (VPADDWMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPADDW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDBMasked128) + v.reset(OpAMD64VPADDWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPADDSB128 x y) mask) - // result: (VPADDSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPADDSW128 x y) mask) + // result: (VPADDSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPADDSB128 { + if v_0.Op != OpAMD64VPADDSW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDSBMasked128) + v.reset(OpAMD64VPADDSWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPADDUSB128 x y) mask) - // result: (VPADDUSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPADDUSW128 x y) mask) + // result: (VPADDUSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPADDUSB128 { + if v_0.Op != OpAMD64VPADDUSW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDUSBMasked128) + v.reset(OpAMD64VPADDUSWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPAVGB128 x y) mask) - // result: (VPAVGBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPAVGW128 x y) mask) + // result: (VPAVGWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPAVGB128 { + if v_0.Op != OpAMD64VPAVGW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPAVGBMasked128) + v.reset(OpAMD64VPAVGWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) - // result: (VPBROADCASTBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) + // result: (VPBROADCASTWMasked128 x mask) for { - if v_0.Op != OpAMD64VPBROADCASTB128 { + if v_0.Op != OpAMD64VPBROADCASTW128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPBROADCASTBMasked128) + v.reset(OpAMD64VPBROADCASTWMasked128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPERMI2B128 x y z) mask) - // result: (VPERMI2BMasked128 x y z mask) + // match: (VMOVDQU16Masked128 (VPERMI2W128 x y z) mask) + // result: (VPERMI2WMasked128 x y z mask) for { - if v_0.Op != OpAMD64VPERMI2B128 { + if v_0.Op != OpAMD64VPERMI2W128 { break } z := v_0.Args[2] x := v_0.Args[0] y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPERMI2BMasked128) + v.reset(OpAMD64VPERMI2WMasked128) v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) - // result: (VPALIGNRMasked128 [a] x y mask) + // match: (VMOVDQU16Masked128 (VPMADDWD128 x y) mask) + // result: (VPMADDWDMasked128 x y mask) for { - if v_0.Op != OpAMD64VPALIGNR128 { + if v_0.Op != OpAMD64VPMADDWD128 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPALIGNRMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMADDWDMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) - // result: (VPMOVSXBQMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMADDUBSW128 x y) mask) + // result: (VPMADDUBSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMOVSXBQ128 { + if v_0.Op != OpAMD64VPMADDUBSW128 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VPMADDUBSWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) - // result: (VPMOVZXBQMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVSXWQ128 x) mask) + // result: (VPMOVSXWQMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBQ128 { + if v_0.Op != OpAMD64VPMOVSXWQ128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked128) + v.reset(OpAMD64VPMOVSXWQMasked128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) - // result: (VPMOVSXBDMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVZXWQ128 x) mask) + // result: (VPMOVZXWQMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBD128 { + if v_0.Op != OpAMD64VPMOVZXWQ128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBDMasked128) + v.reset(OpAMD64VPMOVZXWQMasked128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) - // result: (VPMOVZXBDMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) + // result: (VPMOVSXWDMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBD128 { + if v_0.Op != OpAMD64VPMOVSXWD128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBDMasked128) + v.reset(OpAMD64VPMOVSXWDMasked128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) - // result: (VPMOVSXBWMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) + // result: (VPMOVZXWDMasked128 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBW128 { + if v_0.Op != OpAMD64VPMOVZXWD128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked128) + v.reset(OpAMD64VPMOVZXWDMasked128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) - // result: (VPMOVZXBWMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMAXSW128 x y) mask) + // result: (VPMAXSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMOVZXBW128 { + if v_0.Op != OpAMD64VPMAXSW128 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked128) - v.AddArg2(x, mask) + v.reset(OpAMD64VPMAXSWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VGF2P8AFFINEINVQB128 [a] x y) mask) - // result: (VGF2P8AFFINEINVQBMasked128 [a] x y mask) + // match: (VMOVDQU16Masked128 (VPMAXUW128 x y) mask) + // result: (VPMAXUWMasked128 x y mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEINVQB128 { + if v_0.Op != OpAMD64VPMAXUW128 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMAXUWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VGF2P8AFFINEQB128 [a] x y) mask) - // result: (VGF2P8AFFINEQBMasked128 [a] x y mask) + // match: (VMOVDQU16Masked128 (VPMINSW128 x y) mask) + // result: (VPMINSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEQB128 { + if v_0.Op != OpAMD64VPMINSW128 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEQBMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMINSWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VGF2P8MULB128 x y) mask) - // result: (VGF2P8MULBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPMINUW128 x y) mask) + // result: (VPMINUWMasked128 x y mask) for { - if v_0.Op != OpAMD64VGF2P8MULB128 { + if v_0.Op != OpAMD64VPMINUW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8MULBMasked128) + v.reset(OpAMD64VPMINUWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMAXSB128 x y) mask) - // result: (VPMAXSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPMULHW128 x y) mask) + // result: (VPMULHWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMAXSB128 { + if v_0.Op != OpAMD64VPMULHW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXSBMasked128) + v.reset(OpAMD64VPMULHWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMAXUB128 x y) mask) - // result: (VPMAXUBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPMULHUW128 x y) mask) + // result: (VPMULHUWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMAXUB128 { + if v_0.Op != OpAMD64VPMULHUW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXUBMasked128) + v.reset(OpAMD64VPMULHUWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMINSB128 x y) mask) - // result: (VPMINSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPMULLW128 x y) mask) + // result: (VPMULLWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMINSB128 { + if v_0.Op != OpAMD64VPMULLW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMINSBMasked128) + v.reset(OpAMD64VPMULLWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPMINUB128 x y) mask) - // result: (VPMINUBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPOPCNTW128 x) mask) + // result: (VPOPCNTWMasked128 x mask) for { - if v_0.Op != OpAMD64VPMINUB128 { + if v_0.Op != OpAMD64VPOPCNTW128 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked128 (VPERMW128 x y) mask) + // result: (VPERMWMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPERMW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMINUBMasked128) + v.reset(OpAMD64VPERMWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPOPCNTB128 x) mask) - // result: (VPOPCNTBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) + // result: (VPMOVSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPOPCNTB128 { + if v_0.Op != OpAMD64VPMOVSWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked128) + v.reset(OpAMD64VPMOVSWBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPERMB128 x y) mask) - // result: (VPERMBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) + // result: (VPMOVUSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPERMB128 { + if v_0.Op != OpAMD64VPMOVUSWB128_128 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPERMBMasked128) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVUSWBMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked128 (VPSHUFB128 x y) mask) - // result: (VPSHUFBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPSHLDW128 [a] x y) mask) + // result: (VPSHLDWMasked128 [a] x y mask) for { - if v_0.Op != OpAMD64VPSHUFB128 { + if v_0.Op != OpAMD64VPSHLDW128 { break } + a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSHUFBMasked128) + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPSUBB128 x y) mask) - // result: (VPSUBBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPSLLW128 x y) mask) + // result: (VPSLLWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPSUBB128 { + if v_0.Op != OpAMD64VPSLLW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBBMasked128) + v.reset(OpAMD64VPSLLWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPSUBSB128 x y) mask) - // result: (VPSUBSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPSHRDW128 [a] x y) mask) + // result: (VPSHRDWMasked128 [a] x y mask) for { - if v_0.Op != OpAMD64VPSUBSB128 { + if v_0.Op != OpAMD64VPSHRDW128 { break } + a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBSBMasked128) + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked128 (VPSUBUSB128 x y) mask) - // result: (VPSUBUSBMasked128 x y mask) + // match: (VMOVDQU16Masked128 (VPSRAW128 x y) mask) + // result: (VPSRAWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPSUBUSB128 { + if v_0.Op != OpAMD64VPSRAW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBUSBMasked128) + v.reset(OpAMD64VPSRAWMasked128) v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQU8Masked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQU8Masked256 (VPABSB256 x) mask) - // result: (VPABSBMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSRLW128 x y) mask) + // result: (VPSRLWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPABSB256 { + if v_0.Op != OpAMD64VPSRLW128 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPABSBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSRLWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPADDB256 x y) mask) - // result: (VPADDBMasked256 x y mask) + // match: (VMOVDQU16Masked128 (VPSHLDVW128 x y z) mask) + // result: (VPSHLDVWMasked128 x y z mask) for { - if v_0.Op != OpAMD64VPADDB256 { + if v_0.Op != OpAMD64VPSHLDVW128 { break } - y := v_0.Args[1] + z := v_0.Args[2] x := v_0.Args[0] + y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPADDBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVWMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked256 (VPADDSB256 x y) mask) - // result: (VPADDSBMasked256 x y mask) + // match: (VMOVDQU16Masked128 (VPSLLVW128 x y) mask) + // result: (VPSLLVWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPADDSB256 { + if v_0.Op != OpAMD64VPSLLVW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDSBMasked256) + v.reset(OpAMD64VPSLLVWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPADDUSB256 x y) mask) - // result: (VPADDUSBMasked256 x y mask) + // match: (VMOVDQU16Masked128 (VPSHRDVW128 x y z) mask) + // result: (VPSHRDVWMasked128 x y z mask) for { - if v_0.Op != OpAMD64VPADDUSB256 { + if v_0.Op != OpAMD64VPSHRDVW128 { break } - y := v_0.Args[1] + z := v_0.Args[2] x := v_0.Args[0] + y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPADDUSBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHRDVWMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked256 (VPAVGB256 x y) mask) - // result: (VPAVGBMasked256 x y mask) + // match: (VMOVDQU16Masked128 (VPSRAVW128 x y) mask) + // result: (VPSRAVWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPAVGB256 { + if v_0.Op != OpAMD64VPSRAVW128 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPAVGBMasked256) + v.reset(OpAMD64VPSRAVWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) - // result: (VPBROADCASTBMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSRLVW128 x y) mask) + // result: (VPSRLVWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPBROADCASTB256 { + if v_0.Op != OpAMD64VPSRLVW128 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPBROADCASTBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSRLVWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPERMI2B256 x y z) mask) - // result: (VPERMI2BMasked256 x y z mask) + // match: (VMOVDQU16Masked128 (VPSUBW128 x y) mask) + // result: (VPSUBWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPERMI2B256 { + if v_0.Op != OpAMD64VPSUBW128 { break } - z := v_0.Args[2] - x := v_0.Args[0] y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPERMI2BMasked256) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSUBWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) - // result: (VPALIGNRMasked256 [a] x y mask) + // match: (VMOVDQU16Masked128 (VPSUBSW128 x y) mask) + // result: (VPSUBSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPALIGNR256 { + if v_0.Op != OpAMD64VPSUBSW128 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPALIGNRMasked256) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSUBSWMasked128) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) - // result: (VPMOVSXBQMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSUBUSW128 x y) mask) + // result: (VPSUBUSWMasked128 x y mask) for { - if v_0.Op != OpAMD64VPMOVSXBQ256 { + if v_0.Op != OpAMD64VPSUBUSW128 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBUSWMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) - // result: (VPMOVZXBQMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) + // result: (VPMOVWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBQ256 { + if v_0.Op != OpAMD64VPMOVWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked256) + v.reset(OpAMD64VPMOVWBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVSXBD256 x) mask) - // result: (VPMOVSXBDMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSHUFHW128 [a] x) mask) + // result: (VPSHUFHWMasked128 [a] x mask) for { - if v_0.Op != OpAMD64VPMOVSXBD256 { + if v_0.Op != OpAMD64VPSHUFHW128 { break } + a := auxIntToUint8(v_0.AuxInt) x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBDMasked256) + v.reset(OpAMD64VPSHUFHWMasked128) + v.AuxInt = uint8ToAuxInt(a) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVZXBD256 x) mask) - // result: (VPMOVZXBDMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSHUFLW128 [a] x) mask) + // result: (VPSHUFLWMasked128 [a] x mask) for { - if v_0.Op != OpAMD64VPMOVZXBD256 { + if v_0.Op != OpAMD64VPSHUFLW128 { break } + a := auxIntToUint8(v_0.AuxInt) x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBDMasked256) + v.reset(OpAMD64VPSHUFLWMasked128) + v.AuxInt = uint8ToAuxInt(a) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) - // result: (VPMOVSXBWMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSLLW128const [a] x) mask) + // result: (VPSLLWMasked128const [a] x mask) for { - if v_0.Op != OpAMD64VPMOVSXBW256 { + if v_0.Op != OpAMD64VPSLLW128const { break } + a := auxIntToUint8(v_0.AuxInt) x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked256) + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = uint8ToAuxInt(a) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) - // result: (VPMOVZXBWMasked256 x mask) + // match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) + // result: (VPSRLWMasked128const [a] x mask) for { - if v_0.Op != OpAMD64VPMOVZXBW256 { + if v_0.Op != OpAMD64VPSRLW128const { break } + a := auxIntToUint8(v_0.AuxInt) x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked256) + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(a) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VGF2P8AFFINEINVQB256 [a] x y) mask) - // result: (VGF2P8AFFINEINVQBMasked256 [a] x y mask) + // match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) + // result: (VPSRAWMasked128const [a] x mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEINVQB256 { + if v_0.Op != OpAMD64VPSRAW128const { break } a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256) + v.reset(OpAMD64VPSRAWMasked128const) v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VGF2P8AFFINEQB256 [a] x y) mask) - // result: (VGF2P8AFFINEQBMasked256 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU16Masked256 (VPABSW256 x) mask) + // result: (VPABSWMasked256 x mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEQB256 { + if v_0.Op != OpAMD64VPABSW256 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEQBMasked256) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPABSWMasked256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VGF2P8MULB256 x y) mask) - // result: (VGF2P8MULBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPADDW256 x y) mask) + // result: (VPADDWMasked256 x y mask) for { - if v_0.Op != OpAMD64VGF2P8MULB256 { + if v_0.Op != OpAMD64VPADDW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8MULBMasked256) + v.reset(OpAMD64VPADDWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMAXSB256 x y) mask) - // result: (VPMAXSBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPADDSW256 x y) mask) + // result: (VPADDSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMAXSB256 { + if v_0.Op != OpAMD64VPADDSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXSBMasked256) + v.reset(OpAMD64VPADDSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMAXUB256 x y) mask) - // result: (VPMAXUBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPADDUSW256 x y) mask) + // result: (VPADDUSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMAXUB256 { + if v_0.Op != OpAMD64VPADDUSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXUBMasked256) + v.reset(OpAMD64VPADDUSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMINSB256 x y) mask) - // result: (VPMINSBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPAVGW256 x y) mask) + // result: (VPAVGWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMINSB256 { + if v_0.Op != OpAMD64VPAVGW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMINSBMasked256) + v.reset(OpAMD64VPAVGWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPMINUB256 x y) mask) - // result: (VPMINUBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) + // result: (VPBROADCASTWMasked256 x mask) for { - if v_0.Op != OpAMD64VPMINUB256 { + if v_0.Op != OpAMD64VPBROADCASTW256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMINUBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPBROADCASTWMasked256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPOPCNTB256 x) mask) - // result: (VPOPCNTBMasked256 x mask) + // match: (VMOVDQU16Masked256 (VPERMI2W256 x y z) mask) + // result: (VPERMI2WMasked256 x y z mask) for { - if v_0.Op != OpAMD64VPOPCNTB256 { + if v_0.Op != OpAMD64VPERMI2W256 { break } + z := v_0.Args[2] x := v_0.Args[0] + y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked256) - v.AddArg2(x, mask) + v.reset(OpAMD64VPERMI2WMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked256 (VPERMB256 x y) mask) - // result: (VPERMBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMADDWD256 x y) mask) + // result: (VPMADDWDMasked256 x y mask) for { - if v_0.Op != OpAMD64VPERMB256 { + if v_0.Op != OpAMD64VPMADDWD256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPERMBMasked256) + v.reset(OpAMD64VPMADDWDMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPSHUFB256 x y) mask) - // result: (VPSHUFBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMADDUBSW256 x y) mask) + // result: (VPMADDUBSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPSHUFB256 { + if v_0.Op != OpAMD64VPMADDUBSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSHUFBMasked256) + v.reset(OpAMD64VPMADDUBSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked256 (VPSUBB256 x y) mask) - // result: (VPSUBBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMOVSXWQ256 x) mask) + // result: (VPMOVSXWQMasked256 x mask) for { - if v_0.Op != OpAMD64VPSUBB256 { + if v_0.Op != OpAMD64VPMOVSXWQ256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVSXWQMasked256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPSUBSB256 x y) mask) - // result: (VPSUBSBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMOVZXWQ256 x) mask) + // result: (VPMOVZXWQMasked256 x mask) for { - if v_0.Op != OpAMD64VPSUBSB256 { + if v_0.Op != OpAMD64VPMOVZXWQ256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBSBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVZXWQMasked256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked256 (VPSUBUSB256 x y) mask) - // result: (VPSUBUSBMasked256 x y mask) + // match: (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) + // result: (VPMOVSXWDMasked256 x mask) for { - if v_0.Op != OpAMD64VPSUBUSB256 { + if v_0.Op != OpAMD64VPMOVSXWD256 { break } - y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBUSBMasked256) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPMOVSXWDMasked256) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQU8Masked512 (VPABSB512 x) mask) - // result: (VPABSBMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) + // result: (VPMOVZXWDMasked256 x mask) for { - if v_0.Op != OpAMD64VPABSB512 { + if v_0.Op != OpAMD64VPMOVZXWD256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPABSBMasked512) + v.reset(OpAMD64VPMOVZXWDMasked256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPADDB512 x y) mask) - // result: (VPADDBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPMAXSW256 x y) mask) + // result: (VPMAXSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPADDB512 { + if v_0.Op != OpAMD64VPMAXSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDBMasked512) + v.reset(OpAMD64VPMAXSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPADDSB512 x y) mask) - // result: (VPADDSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPMAXUW256 x y) mask) + // result: (VPMAXUWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPADDSB512 { + if v_0.Op != OpAMD64VPMAXUW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDSBMasked512) + v.reset(OpAMD64VPMAXUWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPADDUSB512 x y) mask) - // result: (VPADDUSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPMINSW256 x y) mask) + // result: (VPMINSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPADDUSB512 { + if v_0.Op != OpAMD64VPMINSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPADDUSBMasked512) + v.reset(OpAMD64VPMINSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPAVGB512 x y) mask) - // result: (VPAVGBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPMINUW256 x y) mask) + // result: (VPMINUWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPAVGB512 { + if v_0.Op != OpAMD64VPMINUW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPAVGBMasked512) + v.reset(OpAMD64VPMINUWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) - // result: (VPBROADCASTBMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMULHW256 x y) mask) + // result: (VPMULHWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPBROADCASTB512 { + if v_0.Op != OpAMD64VPMULHW256 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPBROADCASTBMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPMULHWMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) - // result: (VPERMI2BMasked512 x y z mask) + // match: (VMOVDQU16Masked256 (VPMULHUW256 x y) mask) + // result: (VPMULHUWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPERMI2B512 { + if v_0.Op != OpAMD64VPMULHUW256 { break } - z := v_0.Args[2] - x := v_0.Args[0] y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPERMI2BMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPMULHUWMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) - // result: (VPALIGNRMasked512 [a] x y mask) + // match: (VMOVDQU16Masked256 (VPMULLW256 x y) mask) + // result: (VPMULLWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPALIGNR512 { + if v_0.Op != OpAMD64VPMULLW256 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPALIGNRMasked512) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMULLWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) - // result: (VPMOVSXBQMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPOPCNTW256 x) mask) + // result: (VPOPCNTWMasked256 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBQ512 { + if v_0.Op != OpAMD64VPOPCNTW256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked512) + v.reset(OpAMD64VPOPCNTWMasked256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) - // result: (VPMOVZXBQMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPERMW256 x y) mask) + // result: (VPERMWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMOVZXBQ512 { + if v_0.Op != OpAMD64VPERMW256 { break } + y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPERMWMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) - // result: (VPMOVSXBWMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) + // result: (VPMOVSWBMasked128_256 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBW512 { + if v_0.Op != OpAMD64VPMOVSWB128_256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked512) + v.reset(OpAMD64VPMOVSWBMasked128_256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) - // result: (VPMOVSXBDMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) + // result: (VPMOVSWBMasked256 x mask) for { - if v_0.Op != OpAMD64VPMOVSXBD512 { + if v_0.Op != OpAMD64VPMOVSWB256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSXBDMasked512) + v.reset(OpAMD64VPMOVSWBMasked256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) - // result: (VPMOVZXBWMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) + // result: (VPMOVUSWBMasked128_256 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBW512 { + if v_0.Op != OpAMD64VPMOVUSWB128_256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked512) + v.reset(OpAMD64VPMOVUSWBMasked128_256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) - // result: (VPMOVZXBDMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) + // result: (VPMOVUSWBMasked256 x mask) for { - if v_0.Op != OpAMD64VPMOVZXBD512 { + if v_0.Op != OpAMD64VPMOVUSWB256 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVZXBDMasked512) + v.reset(OpAMD64VPMOVUSWBMasked256) v.AddArg2(x, mask) return true } - // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) - // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) + // match: (VMOVDQU16Masked256 (VPSHLDW256 [a] x y) mask) + // result: (VPSHLDWMasked256 [a] x y mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEINVQB512 { + if v_0.Op != OpAMD64VPSHLDW256 { break } a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) + v.reset(OpAMD64VPSHLDWMasked256) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) - // result: (VGF2P8AFFINEQBMasked512 [a] x y mask) + // match: (VMOVDQU16Masked256 (VPSLLW256 x y) mask) + // result: (VPSLLWMasked256 x y mask) for { - if v_0.Op != OpAMD64VGF2P8AFFINEQB512 { + if v_0.Op != OpAMD64VPSLLW256 { break } - a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEQBMasked512) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) - // result: (VGF2P8MULBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSHRDW256 [a] x y) mask) + // result: (VPSHRDWMasked256 [a] x y mask) for { - if v_0.Op != OpAMD64VGF2P8MULB512 { + if v_0.Op != OpAMD64VPSHRDW256 { break } + a := auxIntToUint8(v_0.AuxInt) y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VGF2P8MULBMasked512) + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPMAXSB512 x y) mask) - // result: (VPMAXSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSRAW256 x y) mask) + // result: (VPSRAWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMAXSB512 { + if v_0.Op != OpAMD64VPSRAW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXSBMasked512) + v.reset(OpAMD64VPSRAWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPMAXUB512 x y) mask) - // result: (VPMAXUBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSRLW256 x y) mask) + // result: (VPSRLWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMAXUB512 { + if v_0.Op != OpAMD64VPSRLW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMAXUBMasked512) + v.reset(OpAMD64VPSRLWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPMINSB512 x y) mask) - // result: (VPMINSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSHLDVW256 x y z) mask) + // result: (VPSHLDVWMasked256 x y z mask) for { - if v_0.Op != OpAMD64VPMINSB512 { + if v_0.Op != OpAMD64VPSHLDVW256 { break } - y := v_0.Args[1] + z := v_0.Args[2] x := v_0.Args[0] + y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPMINSBMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVWMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked512 (VPMINUB512 x y) mask) - // result: (VPMINUBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSLLVW256 x y) mask) + // result: (VPSLLVWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPMINUB512 { + if v_0.Op != OpAMD64VPSLLVW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMINUBMasked512) + v.reset(OpAMD64VPSLLVWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPOPCNTB512 x) mask) - // result: (VPOPCNTBMasked512 x mask) + // match: (VMOVDQU16Masked256 (VPSHRDVW256 x y z) mask) + // result: (VPSHRDVWMasked256 x y z mask) for { - if v_0.Op != OpAMD64VPOPCNTB512 { + if v_0.Op != OpAMD64VPSHRDVW256 { break } + z := v_0.Args[2] x := v_0.Args[0] + y := v_0.Args[1] mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHRDVWMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQU8Masked512 (VPERMB512 x y) mask) - // result: (VPERMBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSRAVW256 x y) mask) + // result: (VPSRAVWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPERMB512 { + if v_0.Op != OpAMD64VPSRAVW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPERMBMasked512) + v.reset(OpAMD64VPSRAVWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPSHUFB512 x y) mask) - // result: (VPSHUFBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSRLVW256 x y) mask) + // result: (VPSRLVWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPSHUFB512 { + if v_0.Op != OpAMD64VPSRLVW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSHUFBMasked512) + v.reset(OpAMD64VPSRLVWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPSUBB512 x y) mask) - // result: (VPSUBBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSUBW256 x y) mask) + // result: (VPSUBWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPSUBB512 { + if v_0.Op != OpAMD64VPSUBW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBBMasked512) + v.reset(OpAMD64VPSUBWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPSUBSB512 x y) mask) - // result: (VPSUBSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSUBSW256 x y) mask) + // result: (VPSUBSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPSUBSB512 { + if v_0.Op != OpAMD64VPSUBSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBSBMasked512) + v.reset(OpAMD64VPSUBSWMasked256) v.AddArg3(x, y, mask) return true } - // match: (VMOVDQU8Masked512 (VPSUBUSB512 x y) mask) - // result: (VPSUBUSBMasked512 x y mask) + // match: (VMOVDQU16Masked256 (VPSUBUSW256 x y) mask) + // result: (VPSUBUSWMasked256 x y mask) for { - if v_0.Op != OpAMD64VPSUBUSB512 { + if v_0.Op != OpAMD64VPSUBUSW256 { break } y := v_0.Args[1] x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPSUBUSBMasked512) + v.reset(OpAMD64VPSUBUSWMasked256) v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQUload128 [off1] {sym} x:(ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUload128 [off1+off2] {sym} ptr mem) + // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) + // result: (VPMOVWBMasked128_256 x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + if v_0.Op != OpAMD64VPMOVWB128_256 { break } - v.reset(OpAMD64VMOVDQUload128) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VMOVDQUload128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUload128 [off1+off2] {mergeSym(sym1, sym2)} base mem) + // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) + // result: (VPMOVWBMasked256 x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if v_0.Op != OpAMD64VPMOVWB256 { break } - v.reset(OpAMD64VMOVDQUload128) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked256) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQUload256 [off1] {sym} x:(ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUload256 [off1+off2] {sym} ptr mem) + // match: (VMOVDQU16Masked256 (VPSHUFHW256 [a] x) mask) + // result: (VPSHUFHWMasked256 [a] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + if v_0.Op != OpAMD64VPSHUFHW256 { break } - v.reset(OpAMD64VMOVDQUload256) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFHWMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VMOVDQUload256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUload256 [off1+off2] {mergeSym(sym1, sym2)} base mem) + // match: (VMOVDQU16Masked256 (VPSHUFLW256 [a] x) mask) + // result: (VPSHUFLWMasked256 [a] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if v_0.Op != OpAMD64VPSHUFLW256 { break } - v.reset(OpAMD64VMOVDQUload256) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFLWMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQUload512 [off1] {sym} x:(ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUload512 [off1+off2] {sym} ptr mem) + // match: (VMOVDQU16Masked256 (VPSLLW256const [a] x) mask) + // result: (VPSLLWMasked256const [a] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1) + int64(off2))) { + if v_0.Op != OpAMD64VPSLLW256const { break } - v.reset(OpAMD64VMOVDQUload512) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VMOVDQUload512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUload512 [off1+off2] {mergeSym(sym1, sym2)} base mem) + // match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) + // result: (VPSRLWMasked256const [a] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64VPSRLW256const { break } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - mem := v_1 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) + // result: (VPSRAWMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRAW256const { break } - v.reset(OpAMD64VMOVDQUload512) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQUstore128 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUstore128 [off1+off2] {sym} ptr val mem) + // match: (VMOVDQU16Masked512 (VPABSW512 x) mask) + // result: (VPABSWMasked512 x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + if v_0.Op != OpAMD64VPABSW512 { break } - v.reset(OpAMD64VMOVDQUstore128) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSWMasked512) + v.AddArg2(x, mask) return true } - // match: (VMOVDQUstore128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUstore128 [off1+off2] {mergeSym(sym1, sym2)} base val mem) + // match: (VMOVDQU16Masked512 (VPADDW512 x y) mask) + // result: (VPADDWMasked512 x y mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + if v_0.Op != OpAMD64VPADDW512 { break } - v.reset(OpAMD64VMOVDQUstore128) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQUstore256 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUstore256 [off1+off2] {sym} ptr val mem) + // match: (VMOVDQU16Masked512 (VPADDSW512 x y) mask) + // result: (VPADDSWMasked512 x y mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + if v_0.Op != OpAMD64VPADDSW512 { break } - v.reset(OpAMD64VMOVDQUstore256) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSWMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VMOVDQUstore256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUstore256 [off1+off2] {mergeSym(sym1, sym2)} base val mem) + // match: (VMOVDQU16Masked512 (VPADDUSW512 x y) mask) + // result: (VPADDUSWMasked512 x y mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64VPADDUSW512 { break } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPAVGW512 x y) mask) + // result: (VPAVGWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPAVGW512 { break } - v.reset(OpAMD64VMOVDQUstore256) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMOVDQUstore512 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (VMOVDQUstore512 [off1+off2] {sym} ptr val mem) + // match: (VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) + // result: (VPBROADCASTWMasked512 x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64VPBROADCASTW512 { break } - off2 := auxIntToInt32(x.AuxInt) - ptr := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) + // result: (VPERMI2WMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2W512 { break } - v.reset(OpAMD64VMOVDQUstore512) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(ptr, val, mem) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2WMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VMOVDQUstore512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (VMOVDQUstore512 [off1+off2] {mergeSym(sym1, sym2)} base val mem) + // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512 x y mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - x := v_0 - if x.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64VPMADDWD512 { break } - off2 := auxIntToInt32(x.AuxInt) - sym2 := auxToSym(x.Aux) - base := x.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64VMOVDQUstore512) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDWDMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VMOVQ x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVQload [off] {sym} ptr mem) + // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) + // result: (VPMADDUBSWMasked512 x y mask) for { - x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + if v_0.Op != OpAMD64VPMADDUBSW512 { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDUBSWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVSDload [off] {sym} ptr mem) + // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) + // result: (VPMOVSXWDMasked512 x mask) for { - x := v_0 - if x.Op != OpAMD64MOVSDload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + if v_0.Op != OpAMD64VPMOVSXWD512 { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWDMasked512) + v.AddArg2(x, mask) return true } - // match: (VMOVSDf2v x:(MOVSDconst [c] )) - // result: (VMOVSDconst [c] ) + // match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) + // result: (VPMOVSXWQMasked512 x mask) for { - x := v_0 - if x.Op != OpAMD64MOVSDconst { + if v_0.Op != OpAMD64VPMOVSXWQ512 { break } - c := auxIntToFloat64(x.AuxInt) - v.reset(OpAMD64VMOVSDconst) - v.AuxInt = float64ToAuxInt(c) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWQMasked512) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVSSload [off] {sym} ptr mem) + // match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) + // result: (VPMOVZXWDMasked512 x mask) for { - x := v_0 - if x.Op != OpAMD64MOVSSload { - break - } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + if v_0.Op != OpAMD64VPMOVZXWD512 { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWDMasked512) + v.AddArg2(x, mask) return true } - // match: (VMOVSSf2v x:(MOVSSconst [c] )) - // result: (VMOVSSconst [c] ) + // match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) + // result: (VPMOVZXWQMasked512 x mask) for { - x := v_0 - if x.Op != OpAMD64MOVSSconst { + if v_0.Op != OpAMD64VPMOVZXWQ512 { break } - c := auxIntToFloat32(x.AuxInt) - v.reset(OpAMD64VMOVSSconst) - v.AuxInt = float32ToAuxInt(c) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWQMasked512) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPD512load {sym} [off] x ptr mem) + // match: (VMOVDQU16Masked512 (VPMAXSW512 x y) mask) + // result: (VPMAXSWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPMAXSW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPDMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPMAXUW512 x y) mask) + // result: (VPMAXUWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMAXUW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPDMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPMINSW512 x y) mask) + // result: (VPMINSWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMINSW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPDMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPMINUW512 x y) mask) + // result: (VPMINUWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMINUW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPS512load {sym} [off] x ptr mem) + // match: (VMOVDQU16Masked512 (VPMULHW512 x y) mask) + // result: (VPMULHWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPMULHW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULHWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPSMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPSMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPMULHUW512 x y) mask) + // result: (VPMULHUWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMULHUW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULHUWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPSMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPMULLW512 x y) mask) + // result: (VPMULLWMasked512 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMULLW512 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLWMasked512) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VMULPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VMULPSMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU16Masked512 (VPOPCNTW512 x) mask) + // result: (VPOPCNTWMasked512 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VMULPSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPOPCNTW512 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked512) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSD512load {sym} [off] ptr mem) + // match: (VMOVDQU16Masked512 (VPERMW512 x y) mask) + // result: (VPERMWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPERMW512 { break } - v.reset(OpAMD64VPABSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSDMasked128load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSHLDW512 [a] x y) mask) + // result: (VPSHLDWMasked512 [a] x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VPSHLDW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSLLW512 x y) mask) + // result: (VPSLLWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLW512 { break } - v.reset(OpAMD64VPABSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSDMasked256load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSHRDW512 [a] x y) mask) + // result: (VPSHRDWMasked512 [a] x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPSHRDW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRAW512 x y) mask) + // result: (VPSRAWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAW512 { break } - v.reset(OpAMD64VPABSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSDMasked512load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSRLW512 x y) mask) + // result: (VPSRLWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSRLW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSRLWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHLDVW512 x y z) mask) + // result: (VPSHLDVWMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHLDVW512 { break } - v.reset(OpAMD64VPABSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVWMasked512) + v.AddArg4(x, y, z, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQ128load {sym} [off] ptr mem) + // match: (VMOVDQU16Masked512 (VPSLLVW512 x y) mask) + // result: (VPSLLVWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VPSLLVW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHRDVW512 x y z) mask) + // result: (VPSHRDVWMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHRDVW512 { break } - v.reset(OpAMD64VPABSQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVWMasked512) + v.AddArg4(x, y, z, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQ256load {sym} [off] ptr mem) + // match: (VMOVDQU16Masked512 (VPSRAVW512 x y) mask) + // result: (VPSRAVWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPSRAVW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRLVW512 x y) mask) + // result: (VPSRLVWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLVW512 { break } - v.reset(OpAMD64VPABSQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQ512load {sym} [off] ptr mem) + // match: (VMOVDQU16Masked512 (VPSUBW512 x y) mask) + // result: (VPSUBWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSUBW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSUBSW512 x y) mask) + // result: (VPSUBSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBSW512 { break } - v.reset(OpAMD64VPABSQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSWMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQMasked128load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) + // result: (VPSUBUSWMasked512 x y mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VPSUBUSW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_0.Args[1] + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSUBUSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) + // result: (VPSHUFHWMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPSHUFHW512 { break } - v.reset(OpAMD64VPABSQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFHWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQMasked256load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSHUFLW512 [a] x) mask) + // result: (VPSHUFLWMasked512 [a] x mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPSHUFLW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSHUFLWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) + // result: (VPSLLWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSLLW512const { break } - v.reset(OpAMD64VPABSQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPABSQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPABSQMasked512load {sym} [off] ptr mask mem) + // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512const [a] x mask) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSRLW512const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) + // result: (VPSRAWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRAW512const { break } - v.reset(OpAMD64VPABSQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPACKSSDW512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKSSDW512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPABSD128 x) mask) + // result: (VPABSDMasked128 x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPABSD128 { break } - v.reset(OpAMD64VPACKSSDW512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSDMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKSSDWMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VADDPS128 x y) mask) + // result: (VADDPSMasked128 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VADDPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPSMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPADDD128 x y) mask) + // result: (VPADDDMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPADDD128 { break } - v.reset(OpAMD64VPACKSSDWMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDDMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKSSDWMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) + // result: (VBROADCASTSSMasked128 x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VBROADCASTSS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSSMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) + // result: (VPBROADCASTDMasked128 x mask) + for { + if v_0.Op != OpAMD64VPBROADCASTD128 { break } - v.reset(OpAMD64VPACKSSDWMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTDMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKSSDWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) + // result: (VRNDSCALEPSMasked128 [a] x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VRNDSCALEPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VREDUCEPS128 [a] x) mask) + // result: (VREDUCEPSMasked128 [a] x mask) + for { + if v_0.Op != OpAMD64VREDUCEPS128 { break } - v.reset(OpAMD64VPACKSSDWMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKUSDW512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKUSDW512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPERMI2PS128 x y z) mask) + // result: (VPERMI2PSMasked128 x y z mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPERMI2PS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PSMasked128) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPERMI2D128 x y z) mask) + // result: (VPERMI2DMasked128 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2D128 { break } - v.reset(OpAMD64VPACKUSDW512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2DMasked128) + v.AddArg4(x, y, z, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKUSDWMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VCVTDQ2PS128 x) mask) + // result: (VCVTDQ2PSMasked128 x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VCVTDQ2PS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTDQ2PSMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VCVTUDQ2PS128 x) mask) + // result: (VCVTUDQ2PSMasked128 x mask) + for { + if v_0.Op != OpAMD64VCVTUDQ2PS128 { break } - v.reset(OpAMD64VPACKUSDWMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUDQ2PSMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VCVTTPS2DQ128 x) mask) + // result: (VCVTTPS2DQMasked128 x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VCVTTPS2DQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2DQMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked128 (VCVTTPS2UDQ128 x) mask) + // result: (VCVTTPS2UDQMasked128 x mask) + for { + if v_0.Op != OpAMD64VCVTTPS2UDQ128 { break } - v.reset(OpAMD64VPACKUSDWMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2UDQMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VDIVPS128 x y) mask) + // result: (VDIVPSMasked128 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VDIVPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPSMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) + // result: (VPMOVSXDQMasked128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ128 { break } - v.reset(OpAMD64VPACKUSDWMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDD512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPMOVZXDQ128 x) mask) + // result: (VPMOVZXDQMasked128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPMOVZXDQ128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked128) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDDMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPLZCNTD128 x) mask) + // result: (VPLZCNTDMasked128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPLZCNTD128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTDMasked128) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDDMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VMAXPS128 x y) mask) + // result: (VMAXPSMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VMAXPS128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPSMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDDMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPMAXSD128 x y) mask) + // result: (VPMAXSDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMAXSD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSDMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDQ512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPMAXUD128 x y) mask) + // result: (VPMAXUDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPMAXUD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUDMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDQMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VMINPS128 x y) mask) + // result: (VMINPSMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VMINPS128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPSMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDQMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPMINSD128 x y) mask) + // result: (VPMINSDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMINSD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSDMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPADDQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPADDQMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPMINUD128 x y) mask) + // result: (VPMINUDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPADDQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMINUD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUDMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPAND128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPAND128 x (VPMOVMToVec8x16 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU8Masked128 x k) + // match: (VMOVDQU32Masked128 (VFMADD213PS128 x y z) mask) + // result: (VFMADD213PSMasked128 x y z mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec8x16 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU8Masked128) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VFMADD213PS128 { + break } - break + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PSMasked128) + v.AddArg4(x, y, z, mask) + return true } - // match: (VPAND128 x (VPMOVMToVec16x8 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU16Masked128 x k) + // match: (VMOVDQU32Masked128 (VFMADDSUB213PS128 x y z) mask) + // result: (VFMADDSUB213PSMasked128 x y z mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec16x8 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU16Masked128) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VFMADDSUB213PS128 { + break } - break + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PSMasked128) + v.AddArg4(x, y, z, mask) + return true } - // match: (VPAND128 x (VPMOVMToVec32x4 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU32Masked128 x k) + // match: (VMOVDQU32Masked128 (VMULPS128 x y) mask) + // result: (VMULPSMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec32x4 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU32Masked128) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VMULPS128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPSMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPAND128 x (VPMOVMToVec64x2 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU64Masked128 x k) + // match: (VMOVDQU32Masked128 (VPMULLD128 x y) mask) + // result: (VPMULLDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec64x2 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU64Masked128) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPMULLD128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPAND256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPAND256 x (VPMOVMToVec8x32 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU8Masked256 x k) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec8x32 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU8Masked256) - v.AddArg2(x, k) - return true - } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLDMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPAND256 x (VPMOVMToVec16x16 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU16Masked256 x k) + // match: (VMOVDQU32Masked128 (VFMSUBADD213PS128 x y z) mask) + // result: (VFMSUBADD213PSMasked128 x y z mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec16x16 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU16Masked256) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VFMSUBADD213PS128 { + break } - break + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PSMasked128) + v.AddArg4(x, y, z, mask) + return true } - // match: (VPAND256 x (VPMOVMToVec32x8 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU32Masked256 x k) + // match: (VMOVDQU32Masked128 (VPOPCNTD128 x) mask) + // result: (VPOPCNTDMasked128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec32x8 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU32Masked256) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPOPCNTD128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked128) + v.AddArg2(x, mask) + return true } - // match: (VPAND256 x (VPMOVMToVec64x4 k)) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMOVDQU64Masked256 x k) + // match: (VMOVDQU32Masked128 (VPROLD128 [a] x) mask) + // result: (VPROLDMasked128 [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec64x4 { - continue - } - k := v_1.Args[0] - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - continue - } - v.reset(OpAMD64VMOVDQU64Masked256) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPROLD128 { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDD512 x (VPMOVMToVec64x8 k)) - // result: (VMOVDQU64Masked512 x k) + // match: (VMOVDQU32Masked128 (VPRORD128 [a] x) mask) + // result: (VPRORDMasked128 [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec64x8 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU64Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPRORD128 { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } - // match: (VPANDD512 x (VPMOVMToVec32x16 k)) - // result: (VMOVDQU32Masked512 x k) + // match: (VMOVDQU32Masked128 (VPROLVD128 x y) mask) + // result: (VPROLVDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec32x16 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU32Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPROLVD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVDMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPANDD512 x (VPMOVMToVec16x32 k)) - // result: (VMOVDQU16Masked512 x k) + // match: (VMOVDQU32Masked128 (VPRORVD128 x y) mask) + // result: (VPRORVDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec16x32 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU16Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPRORVD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVDMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPANDD512 x (VPMOVMToVec8x64 k)) - // result: (VMOVDQU8Masked512 x k) + // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) + // result: (VPMOVSDBMasked128_128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec8x64 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU8Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPMOVSDB128_128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_128) + v.AddArg2(x, mask) + return true } - // match: (VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDD512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) + // result: (VPACKSSDWMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPACKSSDW128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDDMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) + // result: (VPMOVSDWMasked128_128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMOVSDW128_128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_128) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDDMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) + // result: (VPMOVUSDBMasked128_128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPMOVUSDB128_128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_128) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDDMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) + // result: (VPACKUSDWMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPACKUSDW128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked128) + v.AddArg3(x, y, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDND512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDND512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) + // result: (VPMOVUSDWMasked128_128 x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPMOVUSDW128_128 { break } - v.reset(OpAMD64VPANDND512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNDMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VSCALEFPS128 x y) mask) + // result: (VSCALEFPSMasked128 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VSCALEFPS128 { break } - v.reset(OpAMD64VPANDNDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPSMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNDMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSHLDD128 [a] x y) mask) + // result: (VPSHLDDMasked128 [a] x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPSHLDD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPSLLD128 x y) mask) + // result: (VPSLLDMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPSLLD128 { break } - v.reset(OpAMD64VPANDNDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNDMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSHRDD128 [a] x y) mask) + // result: (VPSHRDDMasked128 [a] x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSHRDD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPSRAD128 x y) mask) + // result: (VPSRADMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPSRAD128 { break } - v.reset(OpAMD64VPANDNDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNQ512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPSRLD128 x y) mask) + // result: (VPSRLDMasked128 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSRLD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPANDNQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNQMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSHLDVD128 x y z) mask) + // result: (VPSHLDVDMasked128 x y z mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if v_0.Op != OpAMD64VPSHLDVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVDMasked128) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPSLLVD128 x y) mask) + // result: (VPSLLVDMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPSLLVD128 { break } - v.reset(OpAMD64VPANDNQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVDMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNQMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSHRDVD128 x y z) mask) + // result: (VPSHRDVDMasked128 x y z mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if v_0.Op != OpAMD64VPSHRDVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVDMasked128) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked128 (VPSRAVD128 x y) mask) + // result: (VPSRAVDMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPSRAVD128 { break } - v.reset(OpAMD64VPANDNQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVDMasked128) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDNQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDNQMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSRLVD128 x y) mask) + // result: (VPSRLVDMasked128 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if v_0.Op != OpAMD64VPSRLVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVDMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked128 (VSQRTPS128 x) mask) + // result: (VSQRTPSMasked128 x mask) + for { + if v_0.Op != OpAMD64VSQRTPS128 { break } - v.reset(OpAMD64VPANDNQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDQ512 x (VPMOVMToVec64x8 k)) - // result: (VMOVDQU64Masked512 x k) + // match: (VMOVDQU32Masked128 (VSUBPS128 x y) mask) + // result: (VSUBPSMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec64x8 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU64Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VSUBPS128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPSMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPANDQ512 x (VPMOVMToVec32x16 k)) - // result: (VMOVDQU32Masked512 x k) + // match: (VMOVDQU32Masked128 (VPSUBD128 x y) mask) + // result: (VPSUBDMasked128 x y mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec32x16 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU32Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPSUBD128 { + break } - break + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBDMasked128) + v.AddArg3(x, y, mask) + return true } - // match: (VPANDQ512 x (VPMOVMToVec16x32 k)) - // result: (VMOVDQU16Masked512 x k) + // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) + // result: (VPMOVDBMasked128_128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec16x32 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU16Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPMOVDB128_128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_128) + v.AddArg2(x, mask) + return true } - // match: (VPANDQ512 x (VPMOVMToVec8x64 k)) - // result: (VMOVDQU8Masked512 x k) + // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) + // result: (VPMOVDWMasked128_128 x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec8x64 { - continue - } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU8Masked512) - v.AddArg2(x, k) - return true + if v_0.Op != OpAMD64VPMOVDW128_128 { + break } - break + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_128) + v.AddArg2(x, mask) + return true } - // match: (VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDQ512load {sym} [off] x ptr mem) + // match: (VMOVDQU32Masked128 (VPSHUFD128 [a] x) mask) + // result: (VPSHUFDMasked128 [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + if v_0.Op != OpAMD64VPSHUFD128 { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDQMasked128load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSLLD128const [a] x) mask) + // result: (VPSLLDMasked128const [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPSLLD128const { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDQMasked256load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) + // result: (VPSRLDMasked128const [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPSRLD128const { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPANDQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPANDQMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask) + // result: (VPSRADMasked128const [a] x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPANDQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + if v_0.Op != OpAMD64VPSRAD128const { + break } - break + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true } return false } -func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) - // result: (VGF2P8MULBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPABSD256 x) mask) + // result: (VPABSDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB512 { + if v_0.Op != OpAMD64VPABSD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VGF2P8MULBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) - // result: (VPABSBMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VADDPS256 x y) mask) + // result: (VADDPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSB512 { + if v_0.Op != OpAMD64VADDPS256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSBMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask) - // result: (VPADDBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPADDD256 x y) mask) + // result: (VPADDDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDB512 { + if v_0.Op != OpAMD64VPADDD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDBMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) - // result: (VPADDSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) + // result: (VBROADCASTSSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSB512 { + if v_0.Op != OpAMD64VBROADCASTSS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) - // result: (VPADDUSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) + // result: (VPBROADCASTDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSB512 { + if v_0.Op != OpAMD64VPBROADCASTD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDUSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPALIGNR512 [a] x y) mask) - // result: (VPALIGNRMasked512Merging dst [a] x y mask) + // match: (VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) + // result: (VRNDSCALEPSMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPALIGNR512 { + if v_0.Op != OpAMD64VRNDSCALEPS256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPALIGNRMasked512Merging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) - // result: (VPAVGBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VREDUCEPS256 [a] x) mask) + // result: (VREDUCEPSMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGB512 { + if v_0.Op != OpAMD64VREDUCEPS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPAVGBMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) - // result: (VPMAXSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPERMI2PS256 x y z) mask) + // result: (VPERMI2PSMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSB512 { + if v_0.Op != OpAMD64VPERMI2PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PSMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) - // result: (VPMAXUBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPERMI2D256 x y z) mask) + // result: (VPERMI2DMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUB512 { + if v_0.Op != OpAMD64VPERMI2D256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXUBMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2DMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) - // result: (VPMINSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTDQ2PS256 x) mask) + // result: (VCVTDQ2PSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSB512 { + if v_0.Op != OpAMD64VCVTDQ2PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTDQ2PSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) - // result: (VPMINUBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTUDQ2PS256 x) mask) + // result: (VCVTUDQ2PSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUB512 { + if v_0.Op != OpAMD64VCVTUDQ2PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINUBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUDQ2PSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) - // result: (VPOPCNTBMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VCVTPS2PD256 x) mask) + // result: (VCVTPS2PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB512 { + if v_0.Op != OpAMD64VCVTPS2PD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPOPCNTBMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPS2PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) - // result: (VPSHUFBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTDQ2PD256 x) mask) + // result: (VCVTDQ2PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFB512 { + if v_0.Op != OpAMD64VCVTDQ2PD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHUFBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTDQ2PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) - // result: (VPSUBBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTUDQ2PD256 x) mask) + // result: (VCVTUDQ2PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBB512 { + if v_0.Op != OpAMD64VCVTUDQ2PD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUDQ2PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) - // result: (VPSUBSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTTPS2DQ256 x) mask) + // result: (VCVTTPS2DQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBSB512 { + if v_0.Op != OpAMD64VCVTTPS2DQ256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2DQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) - // result: (VPSUBUSBMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTTPS2QQ256 x) mask) + // result: (VCVTTPS2QQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB512 { + if v_0.Op != OpAMD64VCVTTPS2QQ256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBUSBMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2QQMasked256) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) - // result: (VADDPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VCVTTPS2UDQ256 x) mask) + // result: (VCVTTPS2UDQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPS512 { + if v_0.Op != OpAMD64VCVTTPS2UDQ256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VADDPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2UDQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTDQ2PS512 x) mask) - // result: (VCVTDQ2PSMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VCVTTPS2UQQ256 x) mask) + // result: (VCVTTPS2UQQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTDQ2PS512 { + if v_0.Op != OpAMD64VCVTTPS2UQQ256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTDQ2PSMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2UQQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) - // result: (VCVTTPS2DQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VDIVPS256 x y) mask) + // result: (VDIVPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ512 { + if v_0.Op != OpAMD64VDIVPS256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPS2DQMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTTPS2UDQ512 x) mask) - // result: (VCVTTPS2UDQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVSXDQ256 x) mask) + // result: (VPMOVSXDQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2UDQ512 { + if v_0.Op != OpAMD64VPMOVSXDQ256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPS2UDQMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTUDQ2PS512 x) mask) - // result: (VCVTUDQ2PSMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVZXDQ256 x) mask) + // result: (VPMOVZXDQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUDQ2PS512 { + if v_0.Op != OpAMD64VPMOVZXDQ256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTUDQ2PSMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) - // result: (VDIVPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPLZCNTD256 x) mask) + // result: (VPLZCNTDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPS512 { + if v_0.Op != OpAMD64VPLZCNTD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VDIVPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) - // result: (VMAXPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VMAXPS256 x y) mask) + // result: (VMAXPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPS512 { + if v_0.Op != OpAMD64VMAXPS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMAXPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) - // result: (VMINPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMAXSD256 x y) mask) + // result: (VPMAXSDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPS512 { + if v_0.Op != OpAMD64VPMAXSD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMINPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) - // result: (VMULPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMAXUD256 x y) mask) + // result: (VPMAXUDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPS512 { + if v_0.Op != OpAMD64VPMAXUD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMULPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) - // result: (VPABSDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VMINPS256 x y) mask) + // result: (VMINPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSD512 { + if v_0.Op != OpAMD64VMINPS256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSDMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMINSD256 x y) mask) + // result: (VPMINSDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW512 { + if v_0.Op != OpAMD64VPMINSD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPACKSSDWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMINUD256 x y) mask) + // result: (VPMINUDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW512 { + if v_0.Op != OpAMD64VPMINUD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPACKUSDWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) - // result: (VPADDDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VFMADD213PS256 x y z) mask) + // result: (VFMADD213PSMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDD512 { + if v_0.Op != OpAMD64VFMADD213PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PSMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) - // result: (VPANDDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VFMADDSUB213PS256 x y z) mask) + // result: (VFMADDSUB213PSMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPANDD512 { + if v_0.Op != OpAMD64VFMADDSUB213PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPANDDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PSMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) - // result: (VPLZCNTDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VMULPS256 x y) mask) + // result: (VMULPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD512 { + if v_0.Op != OpAMD64VMULPS256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPLZCNTDMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) - // result: (VPMAXSDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMULLD256 x y) mask) + // result: (VPMULLDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSD512 { + if v_0.Op != OpAMD64VPMULLD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXSDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) - // result: (VPMAXUDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VFMSUBADD213PS256 x y z) mask) + // result: (VFMSUBADD213PSMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUD512 { + if v_0.Op != OpAMD64VFMSUBADD213PS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PSMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) - // result: (VPMINSDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPOPCNTD256 x) mask) + // result: (VPOPCNTDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSD512 { + if v_0.Op != OpAMD64VPOPCNTD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINSDMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) - // result: (VPMINUDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPERMPS256 x y) mask) + // result: (VPERMPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUD512 { + if v_0.Op != OpAMD64VPERMPS256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) - // result: (VPMOVDBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPERMD256 x y) mask) + // result: (VPERMDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDB128_512 { + if v_0.Op != OpAMD64VPERMD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVDBMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) - // result: (VPMOVDWMasked256Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPROLD256 [a] x) mask) + // result: (VPROLDMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDW256 { + if v_0.Op != OpAMD64VPROLD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVDWMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) - // result: (VPMOVSDBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPRORD256 [a] x) mask) + // result: (VPRORDMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDB128_512 { + if v_0.Op != OpAMD64VPRORD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSDBMasked128_512Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) - // result: (VPMOVSDWMasked256Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPROLVD256 x y) mask) + // result: (VPROLVDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW256 { + if v_0.Op != OpAMD64VPROLVD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSDWMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) - // result: (VPMOVUSDBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPRORVD256 x y) mask) + // result: (VPRORVDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_512 { + if v_0.Op != OpAMD64VPRORVD256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) - // result: (VPMOVUSDWMasked256Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) + // result: (VPMOVSDBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW256 { + if v_0.Op != OpAMD64VPMOVSDB128_256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSDWMasked256Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) - // result: (VPMULLDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) + // result: (VPACKSSDWMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLD512 { + if v_0.Op != OpAMD64VPACKSSDW256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMULLDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) - // result: (VPOPCNTDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) + // result: (VPMOVSDWMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD512 { + if v_0.Op != OpAMD64VPMOVSDW128_256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPOPCNTDMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) - // result: (VPORDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) + // result: (VPMOVSDWMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPORD512 { + if v_0.Op != OpAMD64VPMOVSDW256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPORDMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) - // result: (VPROLDMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) + // result: (VPMOVUSDBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLD512 { + if v_0.Op != OpAMD64VPMOVUSDB128_256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) - // result: (VPROLVDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) + // result: (VPACKUSDWMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLVD512 { + if v_0.Op != OpAMD64VPACKUSDW256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) - // result: (VPRORDMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) + // result: (VPMOVUSDWMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORD512 { + if v_0.Op != OpAMD64VPMOVUSDW128_256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPRORDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) - // result: (VPRORVDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) + // result: (VPMOVUSDWMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORVD512 { + if v_0.Op != OpAMD64VPMOVUSDW256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPRORVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) - // result: (VPSHLDDMasked512Merging dst [a] x y mask) + // match: (VMOVDQU32Masked256 (VSCALEFPS256 x y) mask) + // result: (VSCALEFPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDD512 { + if v_0.Op != OpAMD64VSCALEFPS256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) - // result: (VPSHRDDMasked512Merging dst [a] x y mask) + // match: (VMOVDQU32Masked256 (VPSHLDD256 [a] x y) mask) + // result: (VPSHLDDMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDD512 { + if v_0.Op != OpAMD64VPSHLDD256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512Merging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDDMasked256) v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) - // result: (VPSHUFDMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPSLLD256 x y) mask) + // result: (VPSLLDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFD512 { + if v_0.Op != OpAMD64VPSLLD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHUFDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) - // result: (VPSLLDMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPSHRDD256 [a] x y) mask) + // result: (VPSHRDDMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLD512const { + if v_0.Op != OpAMD64VPSHRDD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLDMasked512constMerging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDDMasked256) v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) - // result: (VPSLLVDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSRAD256 x y) mask) + // result: (VPSRADMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVD512 { + if v_0.Op != OpAMD64VPSRAD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) - // result: (VPSRADMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPSRLD256 x y) mask) + // result: (VPSRLDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAD512const { + if v_0.Op != OpAMD64VPSRLD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRADMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) - // result: (VPSRAVDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSHLDVD256 x y z) mask) + // result: (VPSHLDVDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVD512 { + if v_0.Op != OpAMD64VPSHLDVD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) - // result: (VPSRLDMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPSLLVD256 x y) mask) + // result: (VPSLLVDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLD512const { + if v_0.Op != OpAMD64VPSLLVD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLDMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) - // result: (VPSRLVDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSHRDVD256 x y z) mask) + // result: (VPSHRDVDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVD512 { + if v_0.Op != OpAMD64VPSHRDVD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) - // result: (VPSUBDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSRAVD256 x y) mask) + // result: (VPSRAVDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBD512 { + if v_0.Op != OpAMD64VPSRAVD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPXORD512 x y) mask) - // result: (VPXORDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSRLVD256 x y) mask) + // result: (VPSRLVDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPXORD512 { + if v_0.Op != OpAMD64VPSRLVD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPXORDMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) - // result: (VRCP14PSMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VSQRTPS256 x) mask) + // result: (VSQRTPSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRCP14PS512 { + if v_0.Op != OpAMD64VSQRTPS256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRCP14PSMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) - // result: (VREDUCEPSMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VSUBPS256 x y) mask) + // result: (VSUBPSMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS512 { + if v_0.Op != OpAMD64VSUBPS256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VREDUCEPSMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPSMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) - // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked256 (VPSUBD256 x y) mask) + // result: (VPSUBDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS512 { + if v_0.Op != OpAMD64VPSUBD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRNDSCALEPSMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) - // result: (VRSQRT14PSMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) + // result: (VPMOVDBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PS512 { + if v_0.Op != OpAMD64VPMOVDB128_256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRSQRT14PSMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) - // result: (VSCALEFPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) + // result: (VPMOVDWMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS512 { + if v_0.Op != OpAMD64VPMOVDW128_256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSCALEFPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) - // result: (VSQRTPSMasked512Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) + // result: (VPMOVDWMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSQRTPS512 { + if v_0.Op != OpAMD64VPMOVDW256 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSQRTPSMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) - // result: (VSUBPSMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSHUFD256 [a] x) mask) + // result: (VPSHUFDMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSUBPS512 { + if v_0.Op != OpAMD64VPSHUFD256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSUBPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked256 (VPSLLD256const [a] x) mask) + // result: (VPSLLDMasked256const [a] x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPSLLD256const { break } - v.reset(OpAMD64VPBLENDMDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) - // result: (VADDPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) + // result: (VPSRLDMasked256const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPD512 { + if v_0.Op != OpAMD64VPSRLD256const { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VADDPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTPD2PS256 x) mask) - // result: (VCVTPD2PSMasked256Merging dst x mask) + // match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask) + // result: (VPSRADMasked256const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTPD2PS256 { + if v_0.Op != OpAMD64VPSRAD256const { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTPD2PSMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTQQ2PD512 x) mask) - // result: (VCVTQQ2PDMasked512Merging dst x mask) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU32Masked512 (VPABSD512 x) mask) + // result: (VPABSDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PD512 { + if v_0.Op != OpAMD64VPABSD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTQQ2PDMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTQQ2PS256 x) mask) - // result: (VCVTQQ2PSMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VADDPS512 x y) mask) + // result: (VADDPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PS256 { + if v_0.Op != OpAMD64VADDPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTQQ2PSMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTTPD2DQ256 x) mask) - // result: (VCVTTPD2DQMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPADDD512 x y) mask) + // result: (VPADDDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2DQ256 { + if v_0.Op != OpAMD64VPADDD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPD2DQMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTTPD2QQ512 x) mask) - // result: (VCVTTPD2QQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPANDD512 x y) mask) + // result: (VPANDDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2QQ512 { + if v_0.Op != OpAMD64VPANDD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPD2QQMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTTPD2UDQ256 x) mask) - // result: (VCVTTPD2UDQMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPANDND512 x y) mask) + // result: (VPANDNDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UDQ256 { + if v_0.Op != OpAMD64VPANDND512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPD2UDQMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDNDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTTPD2UQQ512 x) mask) - // result: (VCVTTPD2UQQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) + // result: (VBROADCASTSSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UQQ512 { + if v_0.Op != OpAMD64VBROADCASTSS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTTPD2UQQMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTUQQ2PD512 x) mask) - // result: (VCVTUQQ2PDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) + // result: (VPBROADCASTDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PD512 { + if v_0.Op != OpAMD64VPBROADCASTD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTUQQ2PDMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VCVTUQQ2PS256 x) mask) - // result: (VCVTUQQ2PSMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) + // result: (VRNDSCALEPSMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PS256 { + if v_0.Op != OpAMD64VRNDSCALEPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VCVTUQQ2PSMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) - // result: (VDIVPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) + // result: (VREDUCEPSMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPD512 { + if v_0.Op != OpAMD64VREDUCEPS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VDIVPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) - // result: (VMAXPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) + // result: (VPERMI2PSMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPD512 { + if v_0.Op != OpAMD64VPERMI2PS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMAXPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PSMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) - // result: (VMINPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) + // result: (VPERMI2DMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPD512 { + if v_0.Op != OpAMD64VPERMI2D512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMINPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2DMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) - // result: (VMULPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTDQ2PS512 x) mask) + // result: (VCVTDQ2PSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPD512 { + if v_0.Op != OpAMD64VCVTDQ2PS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VMULPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTDQ2PSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPABSQ512 x) mask) - // result: (VPABSQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VCVTUDQ2PS512 x) mask) + // result: (VCVTUDQ2PSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSQ512 { + if v_0.Op != OpAMD64VCVTUDQ2PS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSQMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUDQ2PSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) - // result: (VPADDQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTPS2PD512 x) mask) + // result: (VCVTPS2PDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDQ512 { + if v_0.Op != OpAMD64VCVTPS2PD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPS2PDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) - // result: (VPANDQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTDQ2PD512 x) mask) + // result: (VCVTDQ2PDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPANDQ512 { + if v_0.Op != OpAMD64VCVTDQ2PD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPANDQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTDQ2PDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) - // result: (VPLZCNTQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VCVTUDQ2PD512 x) mask) + // result: (VCVTUDQ2PDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ512 { + if v_0.Op != OpAMD64VCVTUDQ2PD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPLZCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUDQ2PDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) - // result: (VPMAXSQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) + // result: (VCVTTPS2DQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ512 { + if v_0.Op != OpAMD64VCVTTPS2DQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXSQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2DQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) - // result: (VPMAXUQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTTPS2QQ512 x) mask) + // result: (VCVTTPS2QQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ512 { + if v_0.Op != OpAMD64VCVTTPS2QQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2QQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) - // result: (VPMINSQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTTPS2UDQ512 x) mask) + // result: (VCVTTPS2UDQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSQ512 { + if v_0.Op != OpAMD64VCVTTPS2UDQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINSQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2UDQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) - // result: (VPMINUQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VCVTTPS2UQQ512 x) mask) + // result: (VCVTTPS2UQQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUQ512 { + if v_0.Op != OpAMD64VCVTTPS2UQQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2UQQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) - // result: (VPMOVQBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) + // result: (VDIVPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQB128_512 { + if v_0.Op != OpAMD64VDIVPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVQBMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) - // result: (VPMOVQDMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) + // result: (VPMOVSXDQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQD256 { + if v_0.Op != OpAMD64VPMOVSXDQ512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVQDMasked256Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) - // result: (VPMOVQWMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) + // result: (VPMOVZXDQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQW128_512 { + if v_0.Op != OpAMD64VPMOVZXDQ512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVQWMasked128_512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) - // result: (VPMOVSQBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask) + // result: (VPLZCNTDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQB128_512 { + if v_0.Op != OpAMD64VPLZCNTD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSQBMasked128_512Merging) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) - // result: (VPMOVSQDMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask) + // result: (VMAXPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD256 { + if v_0.Op != OpAMD64VMAXPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSQDMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) - // result: (VPMOVSQWMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPMAXSD512 x y) mask) + // result: (VPMAXSDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQW128_512 { + if v_0.Op != OpAMD64VPMAXSD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSQWMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) - // result: (VPMOVUSQBMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPMAXUD512 x y) mask) + // result: (VPMAXUDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_512 { + if v_0.Op != OpAMD64VPMAXUD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) - // result: (VPMOVUSQDMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VMINPS512 x y) mask) + // result: (VMINPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD256 { + if v_0.Op != OpAMD64VMINPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSQDMasked256Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) - // result: (VPMOVUSQWMasked128_512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPMINSD512 x y) mask) + // result: (VPMINSDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQW128_512 { + if v_0.Op != OpAMD64VPMINSD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSQWMasked128_512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) - // result: (VPMULLQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPMINUD512 x y) mask) + // result: (VPMINUDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLQ512 { + if v_0.Op != OpAMD64VPMINUD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMULLQMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) - // result: (VPOPCNTQMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VFMADD213PS512 x y z) mask) + // result: (VFMADD213PSMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ512 { + if v_0.Op != OpAMD64VFMADD213PS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPOPCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PSMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) - // result: (VPORQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VFMADDSUB213PS512 x y z) mask) + // result: (VFMADDSUB213PSMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPORQ512 { + if v_0.Op != OpAMD64VFMADDSUB213PS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPORQMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PSMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) - // result: (VPROLQMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VMULPS512 x y) mask) + // result: (VMULPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLQ512 { + if v_0.Op != OpAMD64VMULPS512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) - // result: (VPROLVQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPMULLD512 x y) mask) + // result: (VPMULLDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLVQ512 { + if v_0.Op != OpAMD64VPMULLD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) - // result: (VPRORQMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VFMSUBADD213PS512 x y z) mask) + // result: (VFMSUBADD213PSMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORQ512 { + if v_0.Op != OpAMD64VFMSUBADD213PS512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPRORQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PSMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) - // result: (VPRORVQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPOPCNTD512 x) mask) + // result: (VPOPCNTDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORVQ512 { + if v_0.Op != OpAMD64VPOPCNTD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPRORVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) - // result: (VPSHLDQMasked512Merging dst [a] x y mask) + // match: (VMOVDQU32Masked512 (VPORD512 x y) mask) + // result: (VPORDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ512 { + if v_0.Op != OpAMD64VPORD512 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPORDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) - // result: (VPSHRDQMasked512Merging dst [a] x y mask) + // match: (VMOVDQU32Masked512 (VPERMPS512 x y) mask) + // result: (VPERMPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ512 { + if v_0.Op != OpAMD64VPERMPS512 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) - // result: (VPSLLQMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VPERMD512 x y) mask) + // result: (VPERMDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLQ512const { + if v_0.Op != OpAMD64VPERMD512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) - // result: (VPSLLVQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VRCP14PS512 x) mask) + // result: (VRCP14PSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ512 { + if v_0.Op != OpAMD64VRCP14PS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) - // result: (VPSRAQMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VRSQRT14PS512 x) mask) + // result: (VRSQRT14PSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAQ512const { + if v_0.Op != OpAMD64VRSQRT14PS512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRAQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) - // result: (VPSRAVQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPROLD512 [a] x) mask) + // result: (VPROLDMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ512 { + if v_0.Op != OpAMD64VPROLD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) - // result: (VPSRLQMasked512constMerging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VPRORD512 [a] x) mask) + // result: (VPRORDMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLQ512const { + if v_0.Op != OpAMD64VPRORD512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLQMasked512constMerging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) - // result: (VPSRLVQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPROLVD512 x y) mask) + // result: (VPROLVDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ512 { + if v_0.Op != OpAMD64VPROLVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) - // result: (VPSUBQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPRORVD512 x y) mask) + // result: (VPRORVDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBQ512 { + if v_0.Op != OpAMD64VPRORVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBQMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) - // result: (VPXORQMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPXORQ512 { + if v_0.Op != OpAMD64VPMOVSDB128_512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPXORQMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) - // result: (VRCP14PDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRCP14PD512 { + if v_0.Op != OpAMD64VPACKSSDW512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRCP14PDMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) - // result: (VREDUCEPDMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD512 { + if v_0.Op != OpAMD64VPMOVUSDB128_512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VREDUCEPDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) - // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) + // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD512 { + if v_0.Op != OpAMD64VPACKUSDW512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRNDSCALEPDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) - // result: (VRSQRT14PDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) + // result: (VSCALEFPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD512 { + if v_0.Op != OpAMD64VSCALEFPS512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VRSQRT14PDMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) - // result: (VSCALEFPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSHLDD512 [a] x y) mask) + // result: (VPSHLDDMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD512 { + if v_0.Op != OpAMD64VPSHLDD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) - // result: (VSQRTPDMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPSLLD512 x y) mask) + // result: (VPSLLDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSQRTPD512 { + if v_0.Op != OpAMD64VPSLLD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSQRTPDMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) - // result: (VSUBPDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSHRDD512 [a] x y) mask) + // result: (VPSHRDDMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSUBPD512 { + if v_0.Op != OpAMD64VPSHRDD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VSUBPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) + // match: (VMOVDQU32Masked512 (VPSRAD512 x y) mask) + // result: (VPSRADMasked512 x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if v_0.Op != OpAMD64VPSRAD512 { break } - v.reset(OpAMD64VPBLENDMQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked512) + v.AddArg3(x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) - // result: (VPABSWMasked512Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPSRLD512 x y) mask) + // result: (VPSRLDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSW512 { + if v_0.Op != OpAMD64VPSRLD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSWMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) - // result: (VPADDSWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSHLDVD512 x y z) mask) + // result: (VPSHLDVDMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSW512 { + if v_0.Op != OpAMD64VPSHLDVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVDMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) - // result: (VPADDUSWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSLLVD512 x y) mask) + // result: (VPSLLVDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSW512 { + if v_0.Op != OpAMD64VPSLLVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDUSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) - // result: (VPADDWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSHRDVD512 x y z) mask) + // result: (VPSHRDVDMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDW512 { + if v_0.Op != OpAMD64VPSHRDVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPADDWMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVDMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) - // result: (VPAVGWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSRAVD512 x y) mask) + // result: (VPSRAVDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGW512 { + if v_0.Op != OpAMD64VPSRAVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPAVGWMasked512Merging) - v.AddArg4(dst, x, y, mask) - return true - } - // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) - // result: (VPMADDUBSWMasked512Merging dst x y mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRLVD512 x y) mask) + // result: (VPSRLVDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW512 { + if v_0.Op != OpAMD64VPSRLVD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMADDUBSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VSQRTPS512 x) mask) + // result: (VSQRTPSMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDWD512 { + if v_0.Op != OpAMD64VSQRTPS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMADDWDMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) - // result: (VPMAXSWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VSUBPS512 x y) mask) + // result: (VSUBPSMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSW512 { + if v_0.Op != OpAMD64VSUBPS512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPSMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) - // result: (VPMAXUWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSUBD512 x y) mask) + // result: (VPSUBDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUW512 { + if v_0.Op != OpAMD64VPSUBD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMAXUWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) - // result: (VPMINSWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSW512 { + if v_0.Op != OpAMD64VPMOVDB128_512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) - // result: (VPMINUWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPXORD512 x y) mask) + // result: (VPXORDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUW512 { + if v_0.Op != OpAMD64VPXORD512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINUWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPXORDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) - // result: (VPMOVSWBMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) + // result: (VPSHUFDMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB256 { + if v_0.Op != OpAMD64VPSHUFD512 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVSWBMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) - // result: (VPMOVUSWBMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPSLLD512const [a] x) mask) + // result: (VPSLLDMasked512const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB256 { + if v_0.Op != OpAMD64VPSLLD512const { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSWBMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) - // result: (VPMOVWBMasked256Merging dst x mask) + // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVWB256 { + if v_0.Op != OpAMD64VPSRLD512const { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVWBMasked256Merging) - v.AddArg3(dst, x, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) - // result: (VPMULHUWMasked512Merging dst x y mask) + // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) + // result: (VPSRADMasked512const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHUW512 { + if v_0.Op != OpAMD64VPSRAD512const { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMULHUWMasked512Merging) - v.AddArg4(dst, x, y, mask) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) - // result: (VPMULHWMasked512Merging dst x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU64Masked128 (VPABSQ128 x) mask) + // result: (VPABSQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHW512 { + if v_0.Op != OpAMD64VPABSQ128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMULHWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) - // result: (VPMULLWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VADDPD128 x y) mask) + // result: (VADDPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLW512 { + if v_0.Op != OpAMD64VADDPD128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMULLWMasked512Merging) - v.AddArg4(dst, x, y, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) - // result: (VPOPCNTWMasked512Merging dst x mask) + // match: (VMOVDQU64Masked128 (VPADDQ128 x y) mask) + // result: (VPADDQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW512 { + if v_0.Op != OpAMD64VPADDQ128 { break } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPOPCNTWMasked512Merging) - v.AddArg3(dst, x, mask) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) - // result: (VPSHLDWMasked512Merging dst [a] x y mask) + // match: (VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) + // result: (VPBROADCASTQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDW512 { + if v_0.Op != OpAMD64VPBROADCASTQ128 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) - // result: (VPSHRDWMasked512Merging dst [a] x y mask) + // match: (VMOVDQU64Masked128 (VRNDSCALEPD128 [a] x) mask) + // result: (VRNDSCALEPDMasked128 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDW512 { + if v_0.Op != OpAMD64VRNDSCALEPD128 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512Merging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) - // result: (VPSHUFHWMasked512Merging dst [a] x mask) + // match: (VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) + // result: (VREDUCEPDMasked128 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW512 { + if v_0.Op != OpAMD64VREDUCEPD128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHUFHWMasked512Merging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHUFLW512 [a] x) mask) - // result: (VPSHUFLWMasked512Merging dst [a] x mask) + // match: (VMOVDQU64Masked128 (VPERMI2PD128 x y z) mask) + // result: (VPERMI2PDMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFLW512 { + if v_0.Op != OpAMD64VPERMI2PD128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSHUFLWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PDMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) - // result: (VPSLLVWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VPERMI2Q128 x y z) mask) + // result: (VPERMI2QMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVW512 { + if v_0.Op != OpAMD64VPERMI2Q128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked512Merging) - v.AddArg4(dst, x, y, mask) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2QMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) - // result: (VPSLLWMasked512constMerging dst [a] x mask) + // match: (VMOVDQU64Masked128 (VCVTPD2PSX128 x) mask) + // result: (VCVTPD2PSXMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLW512const { + if v_0.Op != OpAMD64VCVTPD2PSX128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSLLWMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPD2PSXMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) - // result: (VPSRAVWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VCVTPD2PSY128 x) mask) + // result: (VCVTPD2PSYMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVW512 { + if v_0.Op != OpAMD64VCVTPD2PSY128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPD2PSYMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) - // result: (VPSRAWMasked512constMerging dst [a] x mask) + // match: (VMOVDQU64Masked128 (VCVTQQ2PSX128 x) mask) + // result: (VCVTQQ2PSXMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAW512const { + if v_0.Op != OpAMD64VCVTQQ2PSX128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRAWMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PSXMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) - // result: (VPSRLVWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VCVTQQ2PSY128 x) mask) + // result: (VCVTQQ2PSYMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVW512 { + if v_0.Op != OpAMD64VCVTQQ2PSY128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLVWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PSYMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) - // result: (VPSRLWMasked512constMerging dst [a] x mask) + // match: (VMOVDQU64Masked128 (VCVTUQQ2PSX128 x) mask) + // result: (VCVTUQQ2PSXMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLW512const { + if v_0.Op != OpAMD64VCVTUQQ2PSX128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSRLWMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PSXMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) - // result: (VPSUBSWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VCVTUQQ2PSY128 x) mask) + // result: (VCVTUQQ2PSYMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBSW512 { + if v_0.Op != OpAMD64VCVTUQQ2PSY128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PSYMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) - // result: (VPSUBUSWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VCVTQQ2PD128 x) mask) + // result: (VCVTQQ2PDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW512 { + if v_0.Op != OpAMD64VCVTQQ2PD128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBUSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PDMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) - // result: (VPSUBWMasked512Merging dst x y mask) + // match: (VMOVDQU64Masked128 (VCVTUQQ2PD128 x) mask) + // result: (VCVTUQQ2PDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBW512 { + if v_0.Op != OpAMD64VCVTUQQ2PD128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBWMasked512Merging) - v.AddArg4(dst, x, y, mask) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PDMasked128) + v.AddArg2(x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2DQX128 x) mask) + // result: (VCVTTPD2DQXMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2DQX128 { break } - v.reset(OpAMD64VADDPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2DQXMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2DQY128 x) mask) + // result: (VCVTTPD2DQYMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2DQY128 { break } - v.reset(OpAMD64VADDPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2DQYMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2QQ128 x) mask) + // result: (VCVTTPD2QQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD256 { + if v_0.Op != OpAMD64VCVTTPD2QQ128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VBROADCASTSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2QQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2UDQX128 x) mask) + // result: (VCVTTPD2UDQXMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2UDQX128 { break } - v.reset(OpAMD64VBROADCASTSDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UDQXMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2UDQY128 x) mask) + // result: (VCVTTPD2UDQYMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2UDQY128 { break } - v.reset(OpAMD64VBROADCASTSSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UDQYMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VCVTTPD2UQQ128 x) mask) + // result: (VCVTTPD2UQQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2UQQ128 { break } - v.reset(OpAMD64VBROADCASTSSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UQQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VDIVPD128 x y) mask) + // result: (VDIVPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VDIVPD128 { break } - v.reset(OpAMD64VBROADCASTSSMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTDQ2PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTDQ2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPLZCNTQ128 x) mask) + // result: (VPLZCNTQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTDQ2PD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPLZCNTQ128 { break } - v.reset(OpAMD64VCVTDQ2PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VCVTDQ2PS128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTDQ2PSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VMAXPD128 x y) mask) + // result: (VMAXPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTDQ2PS128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMAXPD128 { break } - v.reset(OpAMD64VCVTDQ2PSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTPD2PSX128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPD2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMAXSQ128 x y) mask) + // result: (VPMAXSQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTPD2PSX128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMAXSQ128 { break } - v.reset(OpAMD64VCVTPD2PSXMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTPS2PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPMAXUQ128 x y) mask) + // result: (VPMAXUQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTPS2PD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMAXUQ128 { break } - v.reset(OpAMD64VCVTPS2PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTQQ2PD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTQQ2PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VMINPD128 x y) mask) + // result: (VMINPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMINPD128 { break } - v.reset(OpAMD64VCVTQQ2PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTQQ2PSX128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTQQ2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMINSQ128 x y) mask) + // result: (VPMINSQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PSX128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINSQ128 { break } - v.reset(OpAMD64VCVTQQ2PSXMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPD2DQX128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2DQXMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMINUQ128 x y) mask) + // result: (VPMINUQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2DQX128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINUQ128 { break } - v.reset(OpAMD64VCVTTPD2DQXMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPD2QQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2QQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VFMADD213PD128 x y z) mask) + // result: (VFMADD213PDMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2QQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMADD213PD128 { break } - v.reset(OpAMD64VCVTTPD2QQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PDMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPD2UDQX128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2UDQXMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VFMADDSUB213PD128 x y z) mask) + // result: (VFMADDSUB213PDMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UDQX128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMADDSUB213PD128 { break } - v.reset(OpAMD64VCVTTPD2UDQXMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PDMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPD2UQQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2UQQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VMULPD128 x y) mask) + // result: (VMULPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UQQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMULPD128 { break } - v.reset(OpAMD64VCVTTPD2UQQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPMULLQ128 x y) mask) + // result: (VPMULLQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMULLQ128 { break } - v.reset(OpAMD64VCVTTPS2DQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2QQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2QQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VFMSUBADD213PD128 x y z) mask) + // result: (VFMSUBADD213PDMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2QQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMSUBADD213PD128 { break } - v.reset(OpAMD64VCVTTPS2QQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PDMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2UDQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPOPCNTQ128 x) mask) + // result: (VPOPCNTQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2UDQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPOPCNTQ128 { break } - v.reset(OpAMD64VCVTTPS2UDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2UQQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2UQQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VRCP14PD128 x) mask) + // result: (VRCP14PDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2UQQ256 { + if v_0.Op != OpAMD64VRCP14PD128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VRSQRT14PD128 x) mask) + // result: (VRSQRT14PDMasked128 x mask) + for { + if v_0.Op != OpAMD64VRSQRT14PD128 { break } - v.reset(OpAMD64VCVTTPS2UQQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VCVTUDQ2PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUDQ2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPROLQ128 [a] x) mask) + // result: (VPROLQMasked128 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUDQ2PD256 { + if v_0.Op != OpAMD64VPROLQ128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPRORQ128 [a] x) mask) + // result: (VPRORQMasked128 [a] x mask) + for { + if v_0.Op != OpAMD64VPRORQ128 { break } - v.reset(OpAMD64VCVTUDQ2PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VCVTUDQ2PS128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUDQ2PSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPROLVQ128 x y) mask) + // result: (VPROLVQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUDQ2PS128 { + if v_0.Op != OpAMD64VPROLVQ128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVQMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPRORVQ128 x y) mask) + // result: (VPRORVQMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPRORVQ128 { break } - v.reset(OpAMD64VCVTUDQ2PSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VCVTUQQ2PD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUQQ2PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) + // result: (VPMOVSQBMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PD128 { + if v_0.Op != OpAMD64VPMOVSQB128_128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_128) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) + // result: (VPMOVSQWMasked128_128 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_128 { break } - v.reset(OpAMD64VCVTUQQ2PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VCVTUQQ2PSX128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUQQ2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) + // result: (VPMOVSQDMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PSX128 { + if v_0.Op != OpAMD64VPMOVSQD128_128 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VCVTUQQ2PSXMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) + // result: (VPMOVUSQBMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQB128_128 { break } - v.reset(OpAMD64VDIVPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) + // result: (VPMOVUSQWMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQW128_128 { break } - v.reset(OpAMD64VDIVPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) + // result: (VPMOVUSQDMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQD128_128 { break } - v.reset(OpAMD64VGF2P8MULBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VSCALEFPD128 x y) mask) + // result: (VSCALEFPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSCALEFPD128 { break } - v.reset(OpAMD64VMAXPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSHLDQ128 [a] x y) mask) + // result: (VPSHLDQMasked128 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHLDQ128 { break } - v.reset(OpAMD64VMAXPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPSLLQ128 x y) mask) + // result: (VPSLLQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSLLQ128 { break } - v.reset(OpAMD64VMINPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSHRDQ128 [a] x y) mask) + // result: (VPSHRDQMasked128 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHRDQ128 { break } - v.reset(OpAMD64VMINPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPSRAQ128 x y) mask) + // result: (VPSRAQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRAQ128 { break } - v.reset(OpAMD64VMULPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSRLQ128 x y) mask) + // result: (VPSRLQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRLQ128 { break } - v.reset(OpAMD64VMULPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPABSB128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked128 (VPSHLDVQ128 x y z) mask) + // result: (VPSHLDVQMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSB128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHLDVQ128 { break } - v.reset(OpAMD64VPABSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVQMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPABSD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSLLVQ128 x y) mask) + // result: (VPSLLVQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSLLVQ128 { break } - v.reset(OpAMD64VPABSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPSHRDVQ128 x y z) mask) + // result: (VPSHRDVQMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHRDVQ128 { break } - v.reset(OpAMD64VPABSQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVQMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPABSW128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked128 (VPSRAVQ128 x y) mask) + // result: (VPSRAVQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSW128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRAVQ128 { break } - v.reset(OpAMD64VPABSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSRLVQ128 x y) mask) + // result: (VPSRLVQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRLVQ128 { break } - v.reset(OpAMD64VPACKSSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VSQRTPD128 x) mask) + // result: (VSQRTPDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSQRTPD128 { break } - v.reset(OpAMD64VPACKUSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked128 (VSUBPD128 x y) mask) + // result: (VSUBPDMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSUBPD128 { break } - v.reset(OpAMD64VPADDBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPDMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked128 (VPSUBQ128 x y) mask) + // result: (VPSUBQMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBQ128 { break } - v.reset(OpAMD64VPADDDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBQMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) + // result: (VPMOVQBMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQB128_128 { break } - v.reset(OpAMD64VPADDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) + // result: (VPMOVQWMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQW128_128 { break } - v.reset(OpAMD64VPADDSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) + // result: (VPMOVQDMasked128_128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQD128_128 { break } - v.reset(OpAMD64VPADDSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) + // result: (VPSLLQMasked128const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSB128 { + if v_0.Op != OpAMD64VPSLLQ128const { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) + // result: (VPSRLQMasked128const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ128const { break } - v.reset(OpAMD64VPADDUSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) + // result: (VPSRAQMasked128const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSW128 { + if v_0.Op != OpAMD64VPSRAQ128const { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPADDUSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU64Masked256 (VPABSQ256 x) mask) + // result: (VPABSQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPABSQ256 { break } - v.reset(OpAMD64VPADDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPALIGNR128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPALIGNRMasked128Merging dst [a] x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VADDPD256 x y) mask) + // result: (VADDPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPALIGNR128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VADDPD256 { break } - v.reset(OpAMD64VPALIGNRMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPADDQ256 x y) mask) + // result: (VPADDQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDQ256 { break } - v.reset(OpAMD64VPAVGBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) + // result: (VBROADCASTSDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VBROADCASTSD256 { break } - v.reset(OpAMD64VPAVGWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) + // result: (VPBROADCASTQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPBROADCASTQ256 { break } - v.reset(OpAMD64VPBROADCASTBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VRNDSCALEPD256 [a] x) mask) + // result: (VRNDSCALEPDMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VRNDSCALEPD256 { break } - v.reset(OpAMD64VPBROADCASTBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) + // result: (VREDUCEPDMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VREDUCEPD256 { break } - v.reset(OpAMD64VPBROADCASTBMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPERMI2PD256 x y z) mask) + // result: (VPERMI2PDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMI2PD256 { break } - v.reset(OpAMD64VPBROADCASTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPERMI2Q256 x y z) mask) + // result: (VPERMI2QMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMI2Q256 { break } - v.reset(OpAMD64VPBROADCASTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2QMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTPD2PS256 x) mask) + // result: (VCVTPD2PSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTPD2PS256 { break } - v.reset(OpAMD64VPBROADCASTDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPD2PSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTQQ2PS256 x) mask) + // result: (VCVTQQ2PSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTQQ2PS256 { break } - v.reset(OpAMD64VPBROADCASTQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTUQQ2PS256 x) mask) + // result: (VCVTUQQ2PSMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTUQQ2PS256 { break } - v.reset(OpAMD64VPBROADCASTQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PSMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTQQ2PD256 x) mask) + // result: (VCVTQQ2PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTQQ2PD256 { break } - v.reset(OpAMD64VPBROADCASTQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTUQQ2PD256 x) mask) + // result: (VCVTUQQ2PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTUQQ2PD256 { break } - v.reset(OpAMD64VPBROADCASTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTTPD2DQ256 x) mask) + // result: (VCVTTPD2DQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2DQ256 { break } - v.reset(OpAMD64VPBROADCASTWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2DQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTTPD2QQ256 x) mask) + // result: (VCVTTPD2QQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2QQ256 { break } - v.reset(OpAMD64VPBROADCASTWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2QQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTTPD2UDQ256 x) mask) + // result: (VCVTTPD2UDQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2UDQ256 { break } - v.reset(OpAMD64VPLZCNTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UDQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VCVTTPD2UQQ256 x) mask) + // result: (VCVTTPD2UQQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VCVTTPD2UQQ256 { break } - v.reset(OpAMD64VPLZCNTQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UQQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VDIVPD256 x y) mask) + // result: (VDIVPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VDIVPD256 { break } - v.reset(OpAMD64VPMADDUBSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VPLZCNTQ256 x) mask) + // result: (VPLZCNTQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDWD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPLZCNTQ256 { break } - v.reset(OpAMD64VPMADDWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VMAXPD256 x y) mask) + // result: (VMAXPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMAXPD256 { break } - v.reset(OpAMD64VPMAXSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMAXSQ256 x y) mask) + // result: (VPMAXSQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMAXSQ256 { break } - v.reset(OpAMD64VPMAXSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPMAXUQ256 x y) mask) + // result: (VPMAXUQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ128 { + if v_0.Op != OpAMD64VPMAXUQ256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMAXSQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VMINPD256 x y) mask) + // result: (VMINPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMINPD256 { break } - v.reset(OpAMD64VPMAXSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPMINSQ256 x y) mask) + // result: (VPMINSQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINSQ256 { break } - v.reset(OpAMD64VPMAXUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMINUQ256 x y) mask) + // result: (VPMINUQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINUQ256 { break } - v.reset(OpAMD64VPMAXUDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VFMADD213PD256 x y z) mask) + // result: (VFMADD213PDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMADD213PD256 { break } - v.reset(OpAMD64VPMAXUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VFMADDSUB213PD256 x y z) mask) + // result: (VFMADDSUB213PDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMADDSUB213PD256 { break } - v.reset(OpAMD64VPMAXUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VMULPD256 x y) mask) + // result: (VMULPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VMULPD256 { break } - v.reset(OpAMD64VPMINSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMULLQ256 x y) mask) + // result: (VPMULLQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMULLQ256 { break } - v.reset(OpAMD64VPMINSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VFMSUBADD213PD256 x y z) mask) + // result: (VFMSUBADD213PDMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VFMSUBADD213PD256 { break } - v.reset(OpAMD64VPMINSQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PDMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VPOPCNTQ256 x) mask) + // result: (VPOPCNTQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPOPCNTQ256 { break } - v.reset(OpAMD64VPMINSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPERMPD256 x y) mask) + // result: (VPERMPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMPD256 { break } - v.reset(OpAMD64VPMINUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPERMQ256 x y) mask) + // result: (VPERMQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMQ256 { break } - v.reset(OpAMD64VPMINUDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VRCP14PD256 x) mask) + // result: (VRCP14PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VRCP14PD256 { break } - v.reset(OpAMD64VPMINUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VRSQRT14PD256 x) mask) + // result: (VRSQRT14PDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VRSQRT14PD256 { break } - v.reset(OpAMD64VPMINUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVDB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPROLQ256 [a] x) mask) + // result: (VPROLQMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPROLQ256 { break } - v.reset(OpAMD64VPMOVDBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVDW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPRORQ256 [a] x) mask) + // result: (VPRORQMasked256 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDW128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPRORQ256 { break } - v.reset(OpAMD64VPMOVDWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVQB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPROLVQ256 x y) mask) + // result: (VPROLVQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPROLVQ256 { break } - v.reset(OpAMD64VPMOVQBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVQD128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPRORVQ256 x y) mask) + // result: (VPRORVQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQD128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPRORVQ256 { break } - v.reset(OpAMD64VPMOVQDMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVQW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) + // result: (VPMOVSQBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQW128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSQB128_256 { break } - v.reset(OpAMD64VPMOVQWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) + // result: (VPMOVSQWMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSQW128_256 { break } - v.reset(OpAMD64VPMOVSDBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) + // result: (VPMOVSQDMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW128_128 { + if v_0.Op != OpAMD64VPMOVSQD128_256 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) + // result: (VPMOVSQDMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD256 { break } - v.reset(OpAMD64VPMOVSDWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) + // result: (VPMOVUSQBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQB128_128 { + if v_0.Op != OpAMD64VPMOVUSQB128_256 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) + // result: (VPMOVUSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_256 { break } - v.reset(OpAMD64VPMOVSQBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) + // result: (VPMOVUSQDMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD128_128 { + if v_0.Op != OpAMD64VPMOVUSQD128_256 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVSQDMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) + // result: (VPMOVUSQDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQW128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQD256 { break } - v.reset(OpAMD64VPMOVSQWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VSCALEFPD256 x y) mask) + // result: (VSCALEFPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSCALEFPD256 { break } - v.reset(OpAMD64VPMOVSWBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSHLDQ256 [a] x y) mask) + // result: (VPSHLDQMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHLDQ256 { break } - v.reset(OpAMD64VPMOVSXBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSLLQ256 x y) mask) + // result: (VPSLLQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSLLQ256 { break } - v.reset(OpAMD64VPMOVSXBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSHRDQ256 [a] x y) mask) + // result: (VPSHRDQMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHRDQ256 { break } - v.reset(OpAMD64VPMOVSXBDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSRAQ256 x y) mask) + // result: (VPSRAQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRAQ256 { break } - v.reset(OpAMD64VPMOVSXBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSRLQ256 x y) mask) + // result: (VPSRLQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRLQ256 { break } - v.reset(OpAMD64VPMOVSXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSHLDVQ256 x y z) mask) + // result: (VPSHLDVQMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHLDVQ256 { break } - v.reset(OpAMD64VPMOVSXBQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVQMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSLLVQ256 x y) mask) + // result: (VPSLLVQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSLLVQ256 { break } - v.reset(OpAMD64VPMOVSXBWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked256 (VPSHRDVQ256 x y z) mask) + // result: (VPSHRDVQMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHRDVQ256 { break } - v.reset(OpAMD64VPMOVSXBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVQMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPSRAVQ256 x y) mask) + // result: (VPSRAVQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRAVQ256 { break } - v.reset(OpAMD64VPMOVSXDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPSRLVQ256 x y) mask) + // result: (VPSRLVQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRLVQ256 { break } - v.reset(OpAMD64VPMOVSXDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VSQRTPD256 x) mask) + // result: (VSQRTPDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSQRTPD256 { break } - v.reset(OpAMD64VPMOVSXWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VSUBPD256 x y) mask) + // result: (VSUBPDMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSUBPD256 { break } - v.reset(OpAMD64VPMOVSXWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPDMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VPSUBQ256 x y) mask) + // result: (VPSUBQMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBQ256 { break } - v.reset(OpAMD64VPMOVSXWQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBQMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) + // result: (VPMOVQBMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQB128_256 { break } - v.reset(OpAMD64VPMOVSXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) + // result: (VPMOVQWMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQW128_256 { break } - v.reset(OpAMD64VPMOVSXWQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) + // result: (VPMOVQDMasked128_256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQD128_256 { break } - v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) + // result: (VPMOVQDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW128_128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVQD256 { break } - v.reset(OpAMD64VPMOVUSDWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) + // result: (VPSLLQMasked256const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_128 { + if v_0.Op != OpAMD64VPSLLQ256const { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) + // result: (VPSRLQMasked256const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ256const { break } - v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) + // result: (VPSRAQMasked256const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD128_128 { + if v_0.Op != OpAMD64VPSRAQ256const { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU64Masked512 (VPABSQ512 x) mask) + // result: (VPABSQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPABSQ512 { break } - v.reset(OpAMD64VPMOVUSQDMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VADDPD512 x y) mask) + // result: (VADDPDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQW128_128 { + if v_0.Op != OpAMD64VADDPD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVUSQWMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPADDQ512 x y) mask) + // result: (VPADDQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128_128 { + if v_0.Op != OpAMD64VPADDQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPANDQ512 x y) mask) + // result: (VPANDQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPANDQ512 { break } - v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPANDNQ512 x y) mask) + // result: (VPANDNQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVWB128_128 { + if v_0.Op != OpAMD64VPANDNQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDNQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) + // result: (VBROADCASTSDMasked512 x mask) + for { + if v_0.Op != OpAMD64VBROADCASTSD512 { break } - v.reset(OpAMD64VPMOVWBMasked128_128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) + // result: (VPBROADCASTQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD128 { + if v_0.Op != OpAMD64VPBROADCASTQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) + // result: (VRNDSCALEPDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VRNDSCALEPD512 { break } - v.reset(OpAMD64VPMOVZXBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) + // result: (VREDUCEPDMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD256 { + if v_0.Op != OpAMD64VREDUCEPD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) + // result: (VPERMI2PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2PD512 { break } - v.reset(OpAMD64VPMOVZXBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PDMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) + // result: (VPERMI2QMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD512 { + if v_0.Op != OpAMD64VPERMI2Q512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2QMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VCVTQQ2PD512 x) mask) + // result: (VCVTQQ2PDMasked512 x mask) + for { + if v_0.Op != OpAMD64VCVTQQ2PD512 { break } - v.reset(OpAMD64VPMOVZXBDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTQQ2PDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VCVTUQQ2PD512 x) mask) + // result: (VCVTUQQ2PDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ128 { + if v_0.Op != OpAMD64VCVTUQQ2PD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTUQQ2PDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VCVTTPD2QQ512 x) mask) + // result: (VCVTTPD2QQMasked512 x mask) + for { + if v_0.Op != OpAMD64VCVTTPD2QQ512 { break } - v.reset(OpAMD64VPMOVZXBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2QQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VCVTTPD2UQQ512 x) mask) + // result: (VCVTTPD2UQQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ256 { + if v_0.Op != OpAMD64VCVTTPD2UQQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPD2UQQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) + // result: (VDIVPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VDIVPD512 { break } - v.reset(OpAMD64VPMOVZXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) + // result: (VPLZCNTQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ512 { + if v_0.Op != OpAMD64VPLZCNTQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask) + // result: (VMAXPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMAXPD512 { break } - v.reset(OpAMD64VPMOVZXBQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VPMAXSQ512 x y) mask) + // result: (VPMAXSQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW128 { + if v_0.Op != OpAMD64VPMAXSQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMAXUQ512 x y) mask) + // result: (VPMAXUQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXUQ512 { break } - v.reset(OpAMD64VPMOVZXBWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VMINPD512 x y) mask) + // result: (VMINPDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW256 { + if v_0.Op != OpAMD64VMINPD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMINSQ512 x y) mask) + // result: (VPMINSQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINSQ512 { break } - v.reset(OpAMD64VPMOVZXBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPMINUQ512 x y) mask) + // result: (VPMINUQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ128 { + if v_0.Op != OpAMD64VPMINUQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VFMADD213PD512 x y z) mask) + // result: (VFMADD213PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMADD213PD512 { break } - v.reset(OpAMD64VPMOVZXDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PDMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VFMADDSUB213PD512 x y z) mask) + // result: (VFMADDSUB213PDMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ256 { + if v_0.Op != OpAMD64VFMADDSUB213PD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VMULPD512 x y) mask) + // result: (VMULPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMULPD512 { break } - v.reset(OpAMD64VPMOVZXDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPMULLQ512 x y) mask) + // result: (VPMULLQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD128 { + if v_0.Op != OpAMD64VPMULLQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VFMSUBADD213PD512 x y z) mask) + // result: (VFMSUBADD213PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMSUBADD213PD512 { break } - v.reset(OpAMD64VPMOVZXWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PDMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) + // result: (VPOPCNTQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD256 { + if v_0.Op != OpAMD64VPOPCNTQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPORQ512 x y) mask) + // result: (VPORQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPORQ512 { break } - v.reset(OpAMD64VPMOVZXWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPORQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPERMPD512 x y) mask) + // result: (VPERMPDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ128 { + if v_0.Op != OpAMD64VPERMPD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMQ512 x y) mask) + // result: (VPERMQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMQ512 { break } - v.reset(OpAMD64VPMOVZXWQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VRCP14PD512 x) mask) + // result: (VRCP14PDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ256 { + if v_0.Op != OpAMD64VRCP14PD512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VRSQRT14PD512 x) mask) + // result: (VRSQRT14PDMasked512 x mask) + for { + if v_0.Op != OpAMD64VRSQRT14PD512 { break } - v.reset(OpAMD64VPMOVZXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPROLQ512 [a] x) mask) + // result: (VPROLQMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ512 { + if v_0.Op != OpAMD64VPROLQ512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVZXWQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPRORQ512 [a] x) mask) + // result: (VPRORQMasked512 [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHUW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPRORQ512 { break } - v.reset(OpAMD64VPMULHUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPROLVQ512 x y) mask) + // result: (VPROLVQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPROLVQ512 { break } - v.reset(OpAMD64VPMULHWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPRORVQ512 x y) mask) + // result: (VPRORVQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPRORVQ512 { break } - v.reset(OpAMD64VPMULLDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSQB128_512 { break } - v.reset(OpAMD64VPMULLQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSQW128_512 { break } - v.reset(OpAMD64VPMULLWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQB128_512 { break } - v.reset(OpAMD64VPOPCNTBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVUSQW128_512 { break } - v.reset(OpAMD64VPOPCNTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VSCALEFPD512 x y) mask) + // result: (VSCALEFPDMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VSCALEFPD512 { break } - v.reset(OpAMD64VPOPCNTQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU64Masked512 (VPSHLDQ512 [a] x y) mask) + // result: (VPSHLDQMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHLDQ512 { break } - v.reset(OpAMD64VPOPCNTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPSLLQ512 x y) mask) + // result: (VPSLLQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLD128 { + if v_0.Op != OpAMD64VPSLLQ512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHRDQ512 [a] x y) mask) + // result: (VPSHRDQMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHRDQ512 { break } - v.reset(OpAMD64VPROLDMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDQMasked512) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VPSRAQ512 x y) mask) + // result: (VPSRAQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLQ128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSRAQ512 { break } - v.reset(OpAMD64VPROLQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPSRLQ512 x y) mask) + // result: (VPSRLQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLVD128 { + if v_0.Op != OpAMD64VPSRLQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHLDVQ512 x y z) mask) + // result: (VPSHLDVQMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHLDVQ512 { break } - v.reset(OpAMD64VPROLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVQMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VPSLLVQ512 x y) mask) + // result: (VPSLLVQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPROLVQ128 { + if v_0.Op != OpAMD64VPSLLVQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHRDVQ512 x y z) mask) + // result: (VPSHRDVQMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHRDVQ512 { break } - v.reset(OpAMD64VPROLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVQMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPSRAVQ512 x y) mask) + // result: (VPSRAVQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORD128 { + if v_0.Op != OpAMD64VPSRAVQ512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRLVQ512 x y) mask) + // result: (VPSRLVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLVQ512 { break } - v.reset(OpAMD64VPRORDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VSQRTPD512 x) mask) + // result: (VSQRTPDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORQ128 { + if v_0.Op != OpAMD64VSQRTPD512 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VSUBPD512 x y) mask) + // result: (VSUBPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VSUBPD512 { break } - v.reset(OpAMD64VPRORQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPDMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPSUBQ512 x y) mask) + // result: (VPSUBQMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORVD128 { + if v_0.Op != OpAMD64VPSUBQ512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_512 { break } - v.reset(OpAMD64VPRORVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPRORVQ128 { + if v_0.Op != OpAMD64VPMOVQW128_512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPXORQ512 x y) mask) + // result: (VPXORQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPXORQ512 { break } - v.reset(OpAMD64VPRORVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPXORQMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) + // result: (VPSLLQMasked512const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDD128 { + if v_0.Op != OpAMD64VPSLLQ512const { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ512const { break } - v.reset(OpAMD64VPSHLDDMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512const) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) + // result: (VPSRAQMasked512const [a] x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ128 { + if v_0.Op != OpAMD64VPSRAQ512const { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHLDQMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked512const) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU8Masked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU8Masked128 (VPABSB128 x) mask) + // result: (VPABSBMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHLDW128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPABSB128 { break } - v.reset(OpAMD64VPSHLDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSBMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPADDB128 x y) mask) + // result: (VPADDBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDD128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDB128 { break } - v.reset(OpAMD64VPSHRDDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPADDSB128 x y) mask) + // result: (VPADDSBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDSB128 { break } - v.reset(OpAMD64VPSHRDQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPADDUSB128 x y) mask) + // result: (VPADDUSBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHRDW128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDUSB128 { break } - v.reset(OpAMD64VPSHRDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU8Masked128 (VPAVGB128 x y) mask) + // result: (VPAVGBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPAVGB128 { break } - v.reset(OpAMD64VPSHUFBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) + // result: (VPBROADCASTBMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFD128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPBROADCASTB128 { break } - v.reset(OpAMD64VPSHUFDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTBMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPERMI2B128 x y z) mask) + // result: (VPERMI2BMasked128 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMI2B128 { break } - v.reset(OpAMD64VPSHUFHWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2BMasked128) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPSHUFLW128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFLWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) + // result: (VPALIGNRMasked128 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSHUFLW128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPALIGNR128 { break } - v.reset(OpAMD64VPSHUFLWMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPALIGNRMasked128) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVSXBQ128 x) mask) + // result: (VPMOVSXBQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLD128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBQ128 { break } - v.reset(OpAMD64VPSLLDMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVZXBQ128 x) mask) + // result: (VPMOVZXBQMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLQ128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBQ128 { break } - v.reset(OpAMD64VPSLLQMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) + // result: (VPMOVSXBDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBD128 { break } - v.reset(OpAMD64VPSLLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBDMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) + // result: (VPMOVZXBDMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBD128 { break } - v.reset(OpAMD64VPSLLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBDMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVSXBW128 x) mask) + // result: (VPMOVSXBWMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLVW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBW128 { break } - v.reset(OpAMD64VPSLLVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBWMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPMOVZXBW128 x) mask) + // result: (VPMOVZXBWMasked128 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSLLW128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBW128 { break } - v.reset(OpAMD64VPSLLWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VGF2P8AFFINEINVQB128 [a] x y) mask) + // result: (VGF2P8AFFINEINVQBMasked128 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAD128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VGF2P8AFFINEINVQB128 { break } - v.reset(OpAMD64VPSRADMasked128constMerging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VGF2P8AFFINEQB128 [a] x y) mask) + // result: (VGF2P8AFFINEQBMasked128 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAQ128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VGF2P8AFFINEQB128 { break } - v.reset(OpAMD64VPSRAQMasked128constMerging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEQBMasked128) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VGF2P8MULB128 x y) mask) + // result: (VGF2P8MULBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VGF2P8MULB128 { break } - v.reset(OpAMD64VPSRAVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8MULBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPMAXSB128 x y) mask) + // result: (VPMAXSBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMAXSB128 { break } - v.reset(OpAMD64VPSRAVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPMAXUB128 x y) mask) + // result: (VPMAXUBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAVW128 { + if v_0.Op != OpAMD64VPMAXUB128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUBMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked128 (VPMINSB128 x y) mask) + // result: (VPMINSBMasked128 x y mask) + for { + if v_0.Op != OpAMD64VPMINSB128 { break } - v.reset(OpAMD64VPSRAVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPMINUB128 x y) mask) + // result: (VPMINUBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRAW128const { + if v_0.Op != OpAMD64VPMINUB128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUBMasked128) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked128 (VPOPCNTB128 x) mask) + // result: (VPOPCNTBMasked128 x mask) + for { + if v_0.Op != OpAMD64VPOPCNTB128 { break } - v.reset(OpAMD64VPSRAWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked128) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPERMB128 x y) mask) + // result: (VPERMBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLD128const { + if v_0.Op != OpAMD64VPERMB128 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLDMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPSHUFB128 x y) mask) + // result: (VPSHUFBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLQ128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHUFB128 { break } - v.reset(OpAMD64VPSRLQMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked128 (VPSUBB128 x y) mask) + // result: (VPSUBBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBB128 { break } - v.reset(OpAMD64VPSRLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked128 (VPSUBSB128 x y) mask) + // result: (VPSUBSBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBSB128 { break } - v.reset(OpAMD64VPSRLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked128 (VPSUBUSB128 x y) mask) + // result: (VPSUBUSBMasked128 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLVW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBUSB128 { break } - v.reset(OpAMD64VPSRLVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBUSBMasked128) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU8Masked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU8Masked256 (VPABSB256 x) mask) + // result: (VPABSBMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSRLW128const { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPABSB256 { break } - v.reset(OpAMD64VPSRLWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSBMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU8Masked256 (VPADDB256 x y) mask) + // result: (VPADDBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDB256 { break } - v.reset(OpAMD64VPSUBBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VPADDSB256 x y) mask) + // result: (VPADDSBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDSB256 { break } - v.reset(OpAMD64VPSUBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPADDUSB256 x y) mask) + // result: (VPADDUSBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBQ128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDUSB256 { break } - v.reset(OpAMD64VPSUBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU8Masked256 (VPAVGB256 x y) mask) + // result: (VPAVGBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBSB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPAVGB256 { break } - v.reset(OpAMD64VPSUBSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) + // result: (VPBROADCASTBMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPBROADCASTB256 { break } - v.reset(OpAMD64VPSUBSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTBMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // match: (VMOVDQU8Masked256 (VPERMI2B256 x y z) mask) + // result: (VPERMI2BMasked256 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMI2B256 { break } - v.reset(OpAMD64VPSUBUSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2BMasked256) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) + // result: (VPALIGNRMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPALIGNR256 { break } - v.reset(OpAMD64VPSUBUSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPALIGNRMasked256) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // match: (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) + // result: (VPMOVSXBQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBW128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBQ256 { break } - v.reset(OpAMD64VPSUBWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPMOVZXBQ256 x) mask) + // result: (VPMOVZXBQMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRCP14PD128 { + if v_0.Op != OpAMD64VPMOVZXBQ256 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMOVSXBD256 x) mask) + // result: (VPMOVSXBDMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBD256 { break } - v.reset(OpAMD64VRCP14PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBDMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPMOVZXBD256 x) mask) + // result: (VPMOVZXBDMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD128 { + if v_0.Op != OpAMD64VPMOVZXBD256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBDMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) + // result: (VPMOVSXBWMasked256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBW256 { break } - v.reset(OpAMD64VREDUCEPDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBWMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) + // result: (VPMOVZXBWMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS128 { + if v_0.Op != OpAMD64VPMOVZXBW256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked256 (VGF2P8AFFINEINVQB256 [a] x y) mask) + // result: (VGF2P8AFFINEINVQBMasked256 [a] x y mask) + for { + if v_0.Op != OpAMD64VGF2P8AFFINEINVQB256 { break } - v.reset(OpAMD64VREDUCEPSMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VGF2P8AFFINEQB256 [a] x y) mask) + // result: (VGF2P8AFFINEQBMasked256 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD128 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VGF2P8AFFINEQB256 { break } - v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEQBMasked256) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VGF2P8MULB256 x y) mask) + // result: (VGF2P8MULBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS128 { + if v_0.Op != OpAMD64VGF2P8MULB256 { break } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8MULBMasked256) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMAXSB256 x y) mask) + // result: (VPMAXSBMasked256 x y mask) + for { + if v_0.Op != OpAMD64VPMAXSB256 { break } - v.reset(OpAMD64VRNDSCALEPSMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPMAXUB256 x y) mask) + // result: (VPMAXUBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD128 { + if v_0.Op != OpAMD64VPMAXUB256 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUBMasked256) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPMINSB256 x y) mask) + // result: (VPMINSBMasked256 x y mask) + for { + if v_0.Op != OpAMD64VPMINSB256 { break } - v.reset(OpAMD64VRSQRT14PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPMINUB256 x y) mask) + // result: (VPMINUBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD128 { + if v_0.Op != OpAMD64VPMINUB256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VSCALEFPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VPOPCNTB256 x) mask) + // result: (VPOPCNTBMasked256 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPOPCNTB256 { break } - v.reset(OpAMD64VSCALEFPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked256) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPERMB256 x y) mask) + // result: (VPERMBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSQRTPD128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMB256 { break } - v.reset(OpAMD64VSQRTPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VPSHUFB256 x y) mask) + // result: (VPSHUFBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSQRTPS128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHUFB256 { break } - v.reset(OpAMD64VSQRTPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // match: (VMOVDQU8Masked256 (VPSUBB256 x y) mask) + // result: (VPSUBBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSUBPD128 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBB256 { break } - v.reset(OpAMD64VSUBPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBBMasked256) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // match: (VMOVDQU8Masked256 (VPSUBSB256 x y) mask) + // result: (VPSUBSBMasked256 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VSUBPS128 { + if v_0.Op != OpAMD64VPSUBSB256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSBMasked256) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked256 (VPSUBUSB256 x y) mask) + // result: (VPSUBUSBMasked256 x y mask) + for { + if v_0.Op != OpAMD64VPSUBUSB256 { break } - v.reset(OpAMD64VSUBPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBUSBMasked256) + v.AddArg3(x, y, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPABSB512 x) mask) + // result: (VPABSBMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPD256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPABSB512 { break } - v.reset(OpAMD64VADDPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSBMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPADDB512 x y) mask) + // result: (VPADDBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VADDPS256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDB512 { break } - v.reset(OpAMD64VADDPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTDQ2PD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTDQ2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPADDSB512 x y) mask) + // result: (VPADDSBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTDQ2PD512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDSB512 { break } - v.reset(OpAMD64VCVTDQ2PDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTDQ2PS256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTDQ2PSMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPADDUSB512 x y) mask) + // result: (VPADDUSBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTDQ2PS256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPADDUSB512 { break } - v.reset(OpAMD64VCVTDQ2PSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTPD2PSY128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPD2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPAVGB512 x y) mask) + // result: (VPAVGBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTPD2PSY128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPAVGB512 { break } - v.reset(OpAMD64VCVTPD2PSYMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTPS2PD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) + // result: (VPBROADCASTBMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTPS2PD512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPBROADCASTB512 { break } - v.reset(OpAMD64VCVTPS2PDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTBMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTQQ2PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTQQ2PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) + // result: (VPERMI2BMasked512 x y z mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMI2B512 { break } - v.reset(OpAMD64VCVTQQ2PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2BMasked512) + v.AddArg4(x, y, z, mask) return true } - // match: (VPBLENDVB256 dst (VCVTQQ2PSY128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTQQ2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) + // result: (VPALIGNRMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTQQ2PSY128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPALIGNR512 { break } - v.reset(OpAMD64VCVTQQ2PSYMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPALIGNRMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPD2DQY128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2DQYMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) + // result: (VPMOVSXBQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2DQY128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBQ512 { break } - v.reset(OpAMD64VCVTTPD2DQYMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPD2QQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2QQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) + // result: (VPMOVZXBQMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2QQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBQ512 { break } - v.reset(OpAMD64VCVTTPD2QQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPD2UDQY128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2UDQYMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) + // result: (VPMOVSXBWMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UDQY128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBW512 { break } - v.reset(OpAMD64VCVTTPD2UDQYMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBWMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPD2UQQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPD2UQQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) + // result: (VPMOVSXBDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPD2UQQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVSXBD512 { break } - v.reset(OpAMD64VCVTTPD2UQQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) + // result: (VPMOVZXBWMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBW512 { break } - v.reset(OpAMD64VCVTTPS2DQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2QQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2QQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) + // result: (VPMOVZXBDMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2QQ512 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMOVZXBD512 { break } - v.reset(OpAMD64VCVTTPS2QQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBDMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2UDQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) + // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2UDQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VGF2P8AFFINEINVQB512 { break } - v.reset(OpAMD64VCVTTPS2UDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2UQQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2UQQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) + // result: (VGF2P8AFFINEQBMasked512 [a] x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2UQQ512 { + if v_0.Op != OpAMD64VGF2P8AFFINEQB512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEQBMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) + // result: (VGF2P8MULBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VGF2P8MULB512 { break } - v.reset(OpAMD64VCVTTPS2UQQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8MULBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTUDQ2PD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUDQ2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPMAXSB512 x y) mask) + // result: (VPMAXSBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUDQ2PD512 { + if v_0.Op != OpAMD64VPMAXSB512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VCVTUDQ2PDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTUDQ2PS256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUDQ2PSMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPMAXUB512 x y) mask) + // result: (VPMAXUBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUDQ2PS256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMAXUB512 { break } - v.reset(OpAMD64VCVTUDQ2PSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTUQQ2PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUQQ2PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMINSB512 x y) mask) + // result: (VPMINSBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINSB512 { break } - v.reset(OpAMD64VCVTUQQ2PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VCVTUQQ2PSY128 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTUQQ2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPMINUB512 x y) mask) + // result: (VPMINUBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VCVTUQQ2PSY128 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPMINUB512 { break } - v.reset(OpAMD64VCVTUQQ2PSYMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPOPCNTB512 x) mask) + // result: (VPOPCNTBMasked512 x mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPD256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPOPCNTB512 { break } - v.reset(OpAMD64VDIVPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked512) + v.AddArg2(x, mask) return true } - // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPERMB512 x y) mask) + // result: (VPERMBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VDIVPS256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPERMB512 { break } - v.reset(OpAMD64VDIVPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // match: (VMOVDQU8Masked512 (VPSHUFB512 x y) mask) + // result: (VPSHUFBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSHUFB512 { break } - v.reset(OpAMD64VGF2P8MULBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VMOVDQU8Masked512 (VPSUBB512 x y) mask) + // result: (VPSUBBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPD256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if v_0.Op != OpAMD64VPSUBB512 { break } - v.reset(OpAMD64VMAXPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQU8Masked512 (VPSUBSB512 x y) mask) + // result: (VPSUBSBMasked512 x y mask) for { - dst := v_0 - if v_1.Op != OpAMD64VMAXPS256 { + if v_0.Op != OpAMD64VPSUBSB512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPSUBUSB512 x y) mask) + // result: (VPSUBUSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBUSB512 { break } - v.reset(OpAMD64VMAXPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBUSBMasked512) + v.AddArg3(x, y, mask) return true } - // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUload128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUload128 [off1] {sym} x:(ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUload128 [off1+off2] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPD256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VMINPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUload128) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQUload128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUload128 [off1+off2] {mergeSym(sym1, sym2)} base mem) for { - dst := v_0 - if v_1.Op != OpAMD64VMINPS256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VMINPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUload128) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUload256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUload256 [off1] {sym} x:(ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUload256 [off1+off2] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPD256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VMULPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUload256) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VMULPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQUload256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUload256 [off1+off2] {mergeSym(sym1, sym2)} base mem) for { - dst := v_0 - if v_1.Op != OpAMD64VMULPS256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VMULPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUload256) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (VPBLENDVB256 dst (VPABSB256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUload512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUload512 [off1] {sym} x:(ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUload512 [off1+off2] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSB256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VPABSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMOVDQUload512) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPABSD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VMOVDQUload512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUload512 [off1+off2] {mergeSym(sym1, sym2)} base mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSD256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VPABSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMOVDQUload512) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUstore128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUstore128 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUstore128 [off1+off2] {sym} ptr val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSQ256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VPABSQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMOVDQUstore128) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (VPBLENDVB256 dst (VPABSW256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM mask)) + // match: (VMOVDQUstore128 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUstore128 [off1+off2] {mergeSym(sym1, sym2)} base val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPABSW256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VPABSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMOVDQUstore128) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUstore256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUstore256 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUstore256 [off1+off2] {sym} ptr val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VPACKSSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUstore256) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQUstore256 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUstore256 [off1+off2] {mergeSym(sym1, sym2)} base val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VPACKUSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUstore256) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQUstore512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQUstore512 [off1] {sym} x:(ADDQconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (VMOVDQUstore512 [off1+off2] {sym} ptr val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDB256 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64ADDQconst { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + ptr := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64VPADDBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUstore512) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) return true } - // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VMOVDQUstore512 [off1] {sym1} x:(LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (VMOVDQUstore512 [off1+off2] {mergeSym(sym1, sym2)} base val mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDD256 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + x := v_0 + if x.Op != OpAMD64LEAQ { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off2 := auxIntToInt32(x.AuxInt) + sym2 := auxToSym(x.Aux) + base := x.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64VPADDDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VMOVDQUstore512) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVQ x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVQload [off] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDQ256 { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64VPADDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVSDload [off] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSB256 { + x := v_0 + if x.Op != OpAMD64MOVSDload { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64VPADDSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VMOVSDf2v x:(MOVSDconst [c] )) + // result: (VMOVSDconst [c] ) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDSW256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0 + if x.Op != OpAMD64MOVSDconst { break } - v.reset(OpAMD64VPADDSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + c := auxIntToFloat64(x.AuxInt) + v.reset(OpAMD64VMOVSDconst) + v.AuxInt = float64ToAuxInt(c) return true } - // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVSSload [off] {sym} ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSB256 { + x := v_0 + if x.Op != OpAMD64MOVSSload { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64VPADDUSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VMOVSSf2v x:(MOVSSconst [c] )) + // result: (VMOVSSconst [c] ) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDUSW256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + x := v_0 + if x.Op != OpAMD64MOVSSconst { break } - v.reset(OpAMD64VPADDUSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + c := auxIntToFloat32(x.AuxInt) + v.reset(OpAMD64VMOVSSconst) + v.AuxInt = float32ToAuxInt(c) return true } - // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPD512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPADDW256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPADDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPALIGNR256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPALIGNRMasked256Merging dst [a] x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPALIGNR256 { - break - } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPALIGNRMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGB256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPAVGBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPAVGW256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPAVGWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPS512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPLZCNTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPLZCNTQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW256 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMADDUBSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true - } - // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMADDWD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMADDWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSD512load {sym} [off] ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSB256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked128load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked256load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXSQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked512load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXSW256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ128load {sym} [off] ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUB256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ256load {sym} [off] ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXUDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ512load {sym} [off] ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXUQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked128load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMAXUW256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMAXUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked256load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSB256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked512load {sym} [off] ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSD256 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDW512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINSQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKSSDW512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSW256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKSSDWMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUB256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKSSDWMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUD256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINUDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKSSDWMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDW512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUQ256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINUQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKUSDW512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMINUW256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMINUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKUSDWMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVDB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDB128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVDBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPACKUSDWMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVDW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVDW128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVDWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPACKUSDWMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVQB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDD512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPMOVQBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVQD128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQD128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVQDMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVQW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVQW128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVQWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVSDBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQ512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPMOVSDWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVSQBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVSQDMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSQW128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVSQWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) + return false +} +func rewriteValueAMD64_OpAMD64VPAND128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPAND128 x (VPMOVMToVec8x16 k)) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VMOVDQU8Masked128 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB128_256 { - break - } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMOVSWBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec8x16 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU8Masked128) + v.AddArg2(x, k) + return true + } + break } - // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) + // match: (VPAND128 x (VPMOVMToVec16x8 k)) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) + // result: (VMOVDQU16Masked128 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW512 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec16x8 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU16Masked128) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPAND128 x (VPMOVMToVec32x4 k)) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMOVDQU32Masked128 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec32x4 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU32Masked128) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMOVSXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) + // match: (VPAND128 x (VPMOVMToVec64x2 k)) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VMOVDQU64Masked128 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ512 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec64x2 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU64Masked128) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPAND256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPAND256 x (VPMOVMToVec8x32 k)) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMOVDQU8Masked256 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec8x32 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU8Masked256) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMOVSXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) + // match: (VPAND256 x (VPMOVMToVec16x16 k)) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VMOVDQU16Masked256 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD512 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec16x16 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU16Masked256) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPAND256 x (VPMOVMToVec32x8 k)) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMOVDQU32Masked256 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec32x8 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU32Masked256) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMOVSXWDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) + // match: (VPAND256 x (VPMOVMToVec64x4 k)) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VMOVDQU64Masked256 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128_256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec64x4 { + continue + } + k := v_1.Args[0] + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + continue + } + v.reset(OpAMD64VMOVDQU64Masked256) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDD512 x (VPMOVMToVec64x8 k)) + // result: (VMOVDQU64Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU64Masked512) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VPANDD512 x (VPMOVMToVec32x16 k)) + // result: (VMOVDQU32Masked512 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW128_256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU32Masked512) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPANDD512 x (VPMOVMToVec16x32 k)) + // result: (VMOVDQU16Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec16x32 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU16Masked512) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMOVUSDWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VPANDD512 x (VPMOVMToVec8x64 k)) + // result: (VMOVDQU8Masked512 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128_256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec8x64 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU8Masked512) + v.AddArg2(x, k) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD128_256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPMOVUSQDMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQW128_256 { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDND512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDND512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVUSQWMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDND512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDNDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVWB128_256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVWBMasked128_256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDNDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW512 { + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVZXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDNDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQ512load {sym} [off] x ptr mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVZXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDNQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked128load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMOVZXWDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDNQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked256load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHUW256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMULHUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPANDNQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked512load {sym} [off] x ptr mask mem) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULHW256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1.Args[1] - x := v_1.Args[0] + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMULHWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPANDNQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQ512 x (VPMOVMToVec64x8 k)) + // result: (VMOVDQU64Masked512 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLD256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU64Masked512) + v.AddArg2(x, k) + return true } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPANDQ512 x (VPMOVMToVec32x16 k)) + // result: (VMOVDQU32Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU32Masked512) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMULLDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPANDQ512 x (VPMOVMToVec16x32 k)) + // result: (VMOVDQU16Masked512 x k) for { - dst := v_0 - if v_1.Op != OpAMD64VPMULLQ256 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec16x32 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU16Masked512) + v.AddArg2(x, k) + return true } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break + break + } + // match: (VPANDQ512 x (VPMOVMToVec8x64 k)) + // result: (VMOVDQU8Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec8x64 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU8Masked512) + v.AddArg2(x, k) + return true } - v.reset(OpAMD64VPMULLQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) - return true + break } - // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) + // result: (VGF2P8MULBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW256 { + if v_1.Op != OpAMD64VGF2P8MULB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPMULLWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VGF2P8MULBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) + // result: (VPABSBMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB256 { + if v_1.Op != OpAMD64VPABSB512 { break } x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPOPCNTBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPABSBMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask) + // result: (VPADDBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD256 { + if v_1.Op != OpAMD64VPADDB512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPOPCNTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPADDBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) + // result: (VPADDSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ256 { + if v_1.Op != OpAMD64VPADDSB512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPOPCNTQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPADDSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) + // result: (VPADDUSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW256 { + if v_1.Op != OpAMD64VPADDUSB512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPOPCNTWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPADDUSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPALIGNR512 [a] x y) mask) + // result: (VPALIGNRMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD256 { + if v_1.Op != OpAMD64VPALIGNR512 { break } a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPROLDMasked256Merging) + v.reset(OpAMD64VPALIGNRMasked512Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) + // result: (VPAVGBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLQ256 { + if v_1.Op != OpAMD64VPAVGB512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPROLQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPAVGBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) + // result: (VPMAXSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD256 { + if v_1.Op != OpAMD64VPMAXSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPROLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMAXSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) + // result: (VPMAXUBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ256 { + if v_1.Op != OpAMD64VPMAXUB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPROLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMAXUBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) + // result: (VPMINSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD256 { + if v_1.Op != OpAMD64VPMINSB512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPRORDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMINSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) + // result: (VPMINUBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ256 { + if v_1.Op != OpAMD64VPMINUB512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPRORQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMINUBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) + // result: (VPOPCNTBMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD256 { + if v_1.Op != OpAMD64VPOPCNTB512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPRORVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPOPCNTBMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) + // result: (VPSHUFBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ256 { + if v_1.Op != OpAMD64VPSHUFB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPRORVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSHUFBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) + // result: (VPSUBBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD256 { + if v_1.Op != OpAMD64VPSUBB512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHLDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSUBBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) + // result: (VPSUBSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ256 { + if v_1.Op != OpAMD64VPSUBSB512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHLDQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSUBSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) + // result: (VPSUBUSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW256 { + if v_1.Op != OpAMD64VPSUBUSB512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHLDWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSUBUSBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) + // result: (VADDPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD256 { + if v_1.Op != OpAMD64VADDPS512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHRDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VADDPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VCVTDQ2PS512 x) mask) + // result: (VCVTDQ2PSMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ256 { + if v_1.Op != OpAMD64VCVTDQ2PS512 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHRDQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VCVTDQ2PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) + // result: (VCVTTPS2DQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW256 { + if v_1.Op != OpAMD64VCVTTPS2DQ512 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHRDWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VCVTTPS2DQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // match: (VPBLENDMDMasked512 dst (VCVTTPS2UDQ512 x) mask) + // result: (VCVTTPS2UDQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB256 { + if v_1.Op != OpAMD64VCVTTPS2UDQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHUFBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VCVTTPS2UDQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VCVTUDQ2PS512 x) mask) + // result: (VCVTUDQ2PSMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD256 { + if v_1.Op != OpAMD64VCVTUDQ2PS512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHUFDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VCVTUDQ2PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) + // result: (VDIVPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW256 { + if v_1.Op != OpAMD64VDIVPS512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHUFHWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VDIVPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSHUFLW256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFLWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) + // result: (VMAXPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFLW256 { + if v_1.Op != OpAMD64VMAXPS512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSHUFLWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMAXPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) + // result: (VMINPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD256const { + if v_1.Op != OpAMD64VMINPS512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLDMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMINPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) + // result: (VMULPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ256const { + if v_1.Op != OpAMD64VMULPS512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VMULPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) + // result: (VPABSDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD256 { + if v_1.Op != OpAMD64VPABSD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPABSDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ256 { + if v_1.Op != OpAMD64VPACKSSDW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKSSDWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW256 { + if v_1.Op != OpAMD64VPACKUSDW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPACKUSDWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) + // result: (VPADDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW256const { + if v_1.Op != OpAMD64VPADDD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSLLWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPADDDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) + // result: (VPANDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD256const { + if v_1.Op != OpAMD64VPANDD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRADMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPANDDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) + // result: (VPLZCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ256const { + if v_1.Op != OpAMD64VPLZCNTD512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRAQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPLZCNTDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) + // result: (VPMAXSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD256 { + if v_1.Op != OpAMD64VPMAXSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRAVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMAXSDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) + // result: (VPMAXUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ256 { + if v_1.Op != OpAMD64VPMAXUD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRAVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMAXUDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) + // result: (VPMINSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW256 { + if v_1.Op != OpAMD64VPMINSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRAVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMINSDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) + // result: (VPMINUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW256const { + if v_1.Op != OpAMD64VPMINUD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRAWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMINUDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLD256const { + if v_1.Op != OpAMD64VPMOVDB128_512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLDMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMOVDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) + // result: (VPMOVDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLQ256const { + if v_1.Op != OpAMD64VPMOVDW256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMOVDWMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD256 { + if v_1.Op != OpAMD64VPMOVSDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMOVSDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) + // result: (VPMOVSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ256 { + if v_1.Op != OpAMD64VPMOVSDW256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMOVSDWMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVW256 { + if v_1.Op != OpAMD64VPMOVUSDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) + // result: (VPMOVUSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLW256const { + if v_1.Op != OpAMD64VPMOVUSDW256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSRLWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPMOVUSDWMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) + // result: (VPMULLDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB256 { + if v_1.Op != OpAMD64VPMULLD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPMULLDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) + // result: (VPOPCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD256 { + if v_1.Op != OpAMD64VPOPCNTD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPOPCNTDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) + // result: (VPORDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ256 { + if v_1.Op != OpAMD64VPORD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPORDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) + // result: (VPROLDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB256 { + if v_1.Op != OpAMD64VPROLD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPROLDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) + // result: (VPROLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW256 { + if v_1.Op != OpAMD64VPROLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPROLVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) + // result: (VPRORDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB256 { + if v_1.Op != OpAMD64VPRORD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBUSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPRORDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) + // result: (VPRORVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW256 { + if v_1.Op != OpAMD64VPRORVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBUSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPRORVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) + // result: (VPSHLDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW256 { + if v_1.Op != OpAMD64VPSHLDD512 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VPSUBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSHLDDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) + // result: (VPSHRDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD256 { + if v_1.Op != OpAMD64VPSHRDD512 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VRCP14PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPSHRDDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) + // result: (VPSHUFDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD256 { + if v_1.Op != OpAMD64VPSHUFD512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VREDUCEPDMasked256Merging) + v.reset(OpAMD64VPSHUFDMasked512Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) + // result: (VPSLLDMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS256 { + if v_1.Op != OpAMD64VPSLLD512const { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VREDUCEPSMasked256Merging) + v.reset(OpAMD64VPSLLDMasked512constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) + // result: (VPSLLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD256 { + if v_1.Op != OpAMD64VPSLLVD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VRNDSCALEPDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.reset(OpAMD64VPSLLVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) + // result: (VPSRADMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS256 { + if v_1.Op != OpAMD64VPSRAD512const { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VRNDSCALEPSMasked256Merging) + v.reset(OpAMD64VPSRADMasked512constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) + // result: (VPSRAVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD256 { + if v_1.Op != OpAMD64VPSRAVD512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + v.reset(OpAMD64VPSRAVDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD512const { break } - v.reset(OpAMD64VRSQRT14PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) + // result: (VPSRLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD256 { + if v_1.Op != OpAMD64VPSRLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VSCALEFPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VPSRLVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) + // result: (VPSUBDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS256 { + if v_1.Op != OpAMD64VPSUBD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + v.reset(OpAMD64VPSUBDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VPXORD512 x y) mask) + // result: (VPXORDMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPXORD512 { break } - v.reset(OpAMD64VSCALEFPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPXORDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) + // result: (VRCP14PSMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD256 { + if v_1.Op != OpAMD64VRCP14PS512 { break } x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + v.reset(OpAMD64VRCP14PSMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) + // result: (VREDUCEPSMasked512Merging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPS512 { break } - v.reset(OpAMD64VSQRTPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VREDUCEPSMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) + // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPS256 { + if v_1.Op != OpAMD64VRNDSCALEPS512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + v.reset(OpAMD64VRNDSCALEPSMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) + // result: (VRSQRT14PSMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PS512 { break } - v.reset(OpAMD64VSQRTPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRSQRT14PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // match: (VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) + // result: (VSCALEFPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD256 { + if v_1.Op != OpAMD64VSCALEFPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + v.reset(OpAMD64VSCALEFPSMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) + // result: (VSQRTPSMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPS512 { break } - v.reset(OpAMD64VSUBPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VSQRTPSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) + // result: (VSUBPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS256 { + if v_1.Op != OpAMD64VSUBPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VSUBPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.reset(OpAMD64VSUBPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBROADCASTB128(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB128 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB128 (VMOVQ y)) + // match: (VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - if !(x.Uses == 1) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPBROADCASTB128) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + v.reset(OpAMD64VPBLENDMDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTB256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB256 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB256 (VMOVQ y)) + // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) + // result: (VADDPDMasked512Merging dst x y mask) for { - x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { + dst := v_0 + if v_1.Op != OpAMD64VADDPD512 { break } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VADDPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTPD2PS256 x) mask) + // result: (VCVTPD2PSMasked256Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTPD2PS256 { break } - if !(x.Uses == 1) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTPD2PSMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTQQ2PD512 x) mask) + // result: (VCVTQQ2PDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PD512 { break } - v.reset(OpAMD64VPBROADCASTB256) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTQQ2PDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBROADCASTB512(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB512 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB512 (VMOVQ y)) + // match: (VPBLENDMQMasked512 dst (VCVTQQ2PS256 x) mask) + // result: (VCVTQQ2PSMasked256Merging dst x mask) for { - x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PS256 { break } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTQQ2PSMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTTPD2DQ256 x) mask) + // result: (VCVTTPD2DQMasked256Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2DQ256 { break } - if !(x.Uses == 1) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTTPD2DQMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTTPD2QQ512 x) mask) + // result: (VCVTTPD2QQMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2QQ512 { break } - v.reset(OpAMD64VPBROADCASTB512) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTTPD2QQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBROADCASTW128(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW128 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW128 (VMOVQ y)) + // match: (VPBLENDMQMasked512 dst (VCVTTPD2UDQ256 x) mask) + // result: (VCVTTPD2UDQMasked256Merging dst x mask) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UDQ256 { break } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTTPD2UDQMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTTPD2UQQ512 x) mask) + // result: (VCVTTPD2UQQMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UQQ512 { break } - if !(x.Uses == 1) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTTPD2UQQMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VCVTUQQ2PD512 x) mask) + // result: (VCVTUQQ2PDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PD512 { break } - v.reset(OpAMD64VPBROADCASTW128) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTUQQ2PDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBROADCASTW256(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW256 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW256 (VMOVQ y)) + // match: (VPBLENDMQMasked512 dst (VCVTUQQ2PS256 x) mask) + // result: (VCVTUQQ2PSMasked256Merging dst x mask) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PS256 { break } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VCVTUQQ2PSMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) + // result: (VDIVPDMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD512 { break } - if !(x.Uses == 1) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VDIVPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) + // result: (VMAXPDMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VMAXPD512 { break } - v.reset(OpAMD64VPBROADCASTW256) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VMAXPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBROADCASTW512(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW512 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW512 (VMOVQ y)) + // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) + // result: (VMINPDMasked512Merging dst x y mask) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + dst := v_0 + if v_1.Op != OpAMD64VMINPD512 { break } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VMINPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) + // result: (VMULPDMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VMULPD512 { break } - if !(x.Uses == 1) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VMULPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPABSQ512 x) mask) + // result: (VPABSQMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPABSQ512 { break } - v.reset(OpAMD64VPBROADCASTW512) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPABSQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) + // result: (VPADDQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPADDQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPADDQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) + // result: (VPANDQMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPANDQ512 { break } - v.reset(OpAMD64VPCMPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPANDQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) + // result: (VPLZCNTQMasked512Merging dst x mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPLZCNTQMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) + // result: (VPMAXSQMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSQ512 { break } - v.reset(OpAMD64VPCMPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMAXSQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) + // result: (VPMAXUQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPMAXUQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) + // result: (VPMINSQMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMINSQ512 { break } - v.reset(OpAMD64VPCMPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMINSQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) + // result: (VPMINUQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPMINUQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVQB128_512 { break } - v.reset(OpAMD64VPCMPDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVQBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPEQD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPEQD512load {sym} [off] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) + // result: (VPMOVQDMasked256Merging dst x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPCMPEQD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVQD256 { + break } - break + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVQDMasked256Merging) + v.AddArg3(dst, x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPEQQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPEQQ512load {sym} [off] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512Merging dst x mask) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPCMPEQQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVQW128_512 { + break } - break + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVQWMasked128_512Merging) + v.AddArg3(dst, x, mask) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPGTD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPGTD512load {sym} [off] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512Merging dst x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQB128_512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVSQBMasked128_512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) + // result: (VPMOVSQDMasked256Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQD256 { break } - v.reset(OpAMD64VPCMPGTD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVSQDMasked256Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPGTQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPGTQ512load {sym} [off] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512Merging dst x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQW128_512 { break } - v.reset(OpAMD64VPCMPGTQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512Merging dst x mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_512 { break } - v.reset(OpAMD64VPCMPQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) + // result: (VPMOVUSQDMasked256Merging dst x mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPCMPQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVUSQDMasked256Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512Merging dst x mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQW128_512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPCMPQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVUSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) + // result: (VPMULLQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMULLQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPMULLQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) + // result: (VPOPCNTQMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTQ512 { break } - v.reset(OpAMD64VPCMPQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPOPCNTQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) + // result: (VPORQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPORQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPORQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) + // result: (VPROLQMasked512Merging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPROLQ512 { break } - v.reset(OpAMD64VPCMPUD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPROLQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) + // result: (VPROLVQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPROLVQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPROLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) + // result: (VPRORQMasked512Merging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPRORQ512 { break } - v.reset(OpAMD64VPCMPUDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPRORQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) + // result: (VPRORVQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPRORVQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPRORVQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) + // result: (VPSHLDQMasked512Merging dst [a] x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDQ512 { break } - v.reset(OpAMD64VPCMPUDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) + // result: (VPSHRDQMasked512Merging dst [a] x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSHRDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) + // result: (VPSLLQMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSLLQ512const { break } - v.reset(OpAMD64VPCMPUDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) + // result: (VPSLLVQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) + // result: (VPSRAQMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRAQ512const { break } - v.reset(OpAMD64VPCMPUQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) + // result: (VPSRAVQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSRAVQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512constMerging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ512const { break } - v.reset(OpAMD64VPCMPUQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) + // result: (VPSRLVQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPSRLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) + // result: (VPSUBQMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSUBQ512 { break } - v.reset(OpAMD64VPCMPUQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSUBQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) + // result: (VPXORQMasked512Merging dst x y mask) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPXORQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + v.reset(OpAMD64VPXORQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) + // result: (VRCP14PDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VRCP14PD512 { break } - v.reset(OpAMD64VPCMPUQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRCP14PDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPWSSD512load {sym} [off] x y ptr mem) + // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) + // result: (VREDUCEPDMasked512Merging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPD512 { break } - v.reset(OpAMD64VPDPWSSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VREDUCEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPDPWSSDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) + // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VRNDSCALEPD512 { break } - v.reset(OpAMD64VPDPWSSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRNDSCALEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPDPWSSDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) + // result: (VRSQRT14PDMasked512Merging dst x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PD512 { break } - v.reset(OpAMD64VPDPWSSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRSQRT14PDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) + // result: (VSCALEFPDMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPD512 { break } - v.reset(OpAMD64VPDPWSSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VSCALEFPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMD512load {sym} [off] x ptr mem) + // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) + // result: (VSQRTPDMasked512Merging dst x mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD512 { break } - v.reset(OpAMD64VPERMD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VSQRTPDMasked512Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) + // result: (VSUBPDMasked512Merging dst x y mask) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VSUBPD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VPERMDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VSUBPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMDMasked512load {sym} [off] x ptr mask mem) + // result: (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -53307,7 +56559,7 @@ func rewriteValueAMD64_OpAMD64VPERMDMasked512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPERMDMasked512load) + v.reset(OpAMD64VPBLENDMQMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -53315,10372 +56567,7571 @@ func rewriteValueAMD64_OpAMD64VPERMDMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPERMI2D128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2D128load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) + // result: (VPABSWMasked512Merging dst x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPABSW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPABSWMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) + // result: (VPADDSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPADDSW512 { break } - v.reset(OpAMD64VPERMI2D128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPADDSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2D256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2D256load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) + // result: (VPADDUSWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPADDUSW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPADDUSWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) + // result: (VPADDWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPADDW512 { break } - v.reset(OpAMD64VPERMI2D256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPADDWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2D512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2D512load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) + // result: (VPAVGWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPAVGW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPAVGWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) + // result: (VPMADDUBSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMADDUBSW512 { break } - v.reset(OpAMD64VPERMI2D512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMADDUBSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2DMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2DMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMADDWD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMADDWDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) + // result: (VPMAXSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSW512 { break } - v.reset(OpAMD64VPERMI2DMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMAXSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2DMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2DMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) + // result: (VPMAXUWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMAXUWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) + // result: (VPMINSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMINSW512 { break } - v.reset(OpAMD64VPERMI2DMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMINSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2DMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2DMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) + // result: (VPMINUWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMINUWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) + // result: (VPMOVSWBMasked256Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSWB256 { break } - v.reset(OpAMD64VPERMI2DMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVSWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PD128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PD128load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) + // result: (VPMOVUSWBMasked256Merging dst x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVUSWBMasked256Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) + // result: (VPMOVWBMasked256Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMOVWB256 { break } - v.reset(OpAMD64VPERMI2PD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMOVWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PD256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PD256load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) + // result: (VPMULHUWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + dst := v_0 + if v_1.Op != OpAMD64VPMULHUW512 { break } - v.reset(OpAMD64VPERMI2PD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMULHUWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PD512load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) + // result: (VPMULHWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMULHW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMULHWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) + // result: (VPMULLWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMULLW512 { break } - v.reset(OpAMD64VPERMI2PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMULLWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) + // result: (VPOPCNTWMasked512Merging dst x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPOPCNTWMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) + // result: (VPSHLDWMasked512Merging dst [a] x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDW512 { break } - v.reset(OpAMD64VPERMI2PDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) + // result: (VPSHRDWMasked512Merging dst [a] x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) + // result: (VPSHUFHWMasked512Merging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFHW512 { break } - v.reset(OpAMD64VPERMI2PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHUFHWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPSHUFLW512 [a] x) mask) + // result: (VPSHUFLWMasked512Merging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFLW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHUFLWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) + // result: (VPSLLVWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVW512 { break } - v.reset(OpAMD64VPERMI2PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PS128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PS128load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) + // result: (VPSLLWMasked512constMerging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLW512const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) + // result: (VPSRAVWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVW512 { break } - v.reset(OpAMD64VPERMI2PS128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PS256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PS256load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) + // result: (VPSRAWMasked512constMerging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAW512const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) + // result: (VPSRLVWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVW512 { break } - v.reset(OpAMD64VPERMI2PS256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PS512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PS512load {sym} [off] x y ptr mem) + // match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512constMerging dst [a] x mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW512const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) + // result: (VPSUBSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSUBSW512 { break } - v.reset(OpAMD64VPERMI2PS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSUBSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PSMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) + // result: (VPSUBUSWMasked512Merging dst x y mask) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSUBUSWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) + // result: (VPSUBWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSUBW512 { break } - v.reset(OpAMD64VPERMI2PSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSUBWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPERMI2PSMasked256(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem) + b := v.Block + // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VADDPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VADDPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2PSMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VADDPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2PSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VADDPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2Q128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2Q128load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2Q128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VBROADCASTSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2Q256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2Q256load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2Q256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VBROADCASTSDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2Q512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2Q512load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2Q512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VBROADCASTSSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2QMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2QMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2QMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VBROADCASTSSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2QMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2QMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VBROADCASTSSMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMI2QMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTDQ2PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTDQ2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTDQ2PD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMI2QMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTDQ2PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPD256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VCVTDQ2PS128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTDQ2PSMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTDQ2PS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VCVTDQ2PSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VCVTPD2PSX128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPD2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTPD2PSX128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VCVTPD2PSXMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTPS2PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPS2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTPS2PD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTPS2PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTQQ2PD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTQQ2PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTQQ2PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPS512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPS512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VCVTQQ2PSX128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTQQ2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PSX128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VCVTQQ2PSXMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPSMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPSMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTTPD2DQX128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2DQXMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2DQX128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTTPD2DQXMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMPSMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMPSMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTTPD2QQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2QQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2QQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMPSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTTPD2QQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VCVTTPD2UDQX128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2UDQXMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UDQX128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VCVTTPD2UDQXMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VCVTTPD2UQQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2UQQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UQQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VCVTTPD2UQQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2DQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTTPS2DQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPERMQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPERMQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VCVTTPS2QQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2QQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2QQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPERMQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VCVTTPS2QQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPINSRD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPINSRD128 [0] (Zero128 ) y) - // cond: y.Type.IsFloat() - // result: (VMOVSSf2v y) + // match: (VPBLENDVB128 dst (VCVTTPS2UDQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2UDQ128 { break } - y := v_1 - if !(y.Type.IsFloat()) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMOVSSf2v) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VCVTTPS2UDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - // match: (VPINSRD128 [0] (Zero128 ) y) - // cond: !y.Type.IsFloat() - // result: (VMOVD y) + // match: (VPBLENDVB128 dst (VCVTTPS2UQQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2UQQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2UQQ256 { break } - y := v_1 - if !(!y.Type.IsFloat()) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMOVD) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VCVTTPS2UQQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPINSRQ128 [0] (Zero128 ) y) - // cond: y.Type.IsFloat() - // result: (VMOVSDf2v y) + // match: (VPBLENDVB128 dst (VCVTUDQ2PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUDQ2PDMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUDQ2PD256 { break } - y := v_1 - if !(y.Type.IsFloat()) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMOVSDf2v) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VCVTUDQ2PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - // match: (VPINSRQ128 [0] (Zero128 ) y) - // cond: !y.Type.IsFloat() - // result: (VMOVQ y) + // match: (VPBLENDVB128 dst (VCVTUDQ2PS128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUDQ2PSMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUDQ2PS128 { break } - y := v_1 - if !(!y.Type.IsFloat()) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMOVQ) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VCVTUDQ2PSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTD128load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VCVTUQQ2PD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUQQ2PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VCVTUQQ2PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTD256load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VCVTUQQ2PSX128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUQQ2PSXMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PSX128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VCVTUQQ2PSXMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTD512load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VDIVPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTDMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VDIVPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VDIVPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTDMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VGF2P8MULB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VGF2P8MULBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTDMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VMAXPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VMAXPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQ128load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VMAXPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VMAXPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQ256load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VMINPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VMINPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQ512load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VMINPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VMINPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VMULPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VMULPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VMULPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VMULPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPLZCNTQMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPABSB128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPABSB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPABSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPABSD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPABSD128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSDMasked128load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPABSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPABSQ128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPABSW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPABSW128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPACKSSDW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPACKSSDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPACKUSDW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDD128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDQ128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXSQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXSQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXSQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDSB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDSW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDUSB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDUSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDUSW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDUSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPADDW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPALIGNR128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPALIGNRMasked128Merging dst [a] x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPALIGNR128 { + break } - break + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPALIGNRMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPAVGB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPAVGBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPAVGW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPAVGWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTB128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTB256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMAXUQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMAXUQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMAXUQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTB512 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTBMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTD128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTD256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTD512 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTQ128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTQ256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTQ512 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTW128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTW256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPBROADCASTW512 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPBROADCASTWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINSQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINSQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINSQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTD128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPLZCNTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTQ128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPLZCNTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMADDUBSW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMADDUBSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMADDWD128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMADDWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXSB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXSD128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXSQ128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXSW128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXUB128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXUD128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMINUQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMINUQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMINUQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMAXUQ128 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec16x16ToM (VPMOVMToVec16x16 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec16x16 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUW128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec16x32ToM (VPMOVMToVec16x32 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec16x32 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSB128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec16x8ToM (VPMOVMToVec16x8 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec16x8 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSD128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec32x16ToM (VPMOVMToVec32x16 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec32x16 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSQ128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec32x4ToM (VPMOVMToVec32x4 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec32x4 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSW128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec32x8ToM (VPMOVMToVec32x8 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec32x8 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUB128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec64x2ToM (VPMOVMToVec64x2 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec64x2 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUD128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec64x4ToM (VPMOVMToVec64x4 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec64x4 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUQ128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec64x8ToM (VPMOVMToVec64x8 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec64x8 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUW128 { break } - x := v_0.Args[0] - v.copyOf(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMOVDB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec8x16 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVDB128_128 { break } - x := v_0.Args[0] - v.copyOf(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMOVDW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec8x32 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVDW128_128 { break } - x := v_0.Args[0] - v.copyOf(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVDWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool { - v_0 := v.Args[0] - // match: (VPMOVVec8x64ToM (VPMOVMToVec8x64 x)) - // result: x + // match: (VPBLENDVB128 dst (VPMOVQB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - if v_0.Op != OpAMD64VPMOVMToVec8x64 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVQB128_128 { break } - x := v_0.Args[0] - v.copyOf(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVQD128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVQD128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLDMasked128load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVQW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVQW128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLDMasked512load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSDB128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQ256load {sym} [off] x ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVSDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSDW128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQMasked128load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVSDWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQB128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPMULLQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPMULLQMasked512load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPMULLQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVSQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTD128load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQD128_128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTD256load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQW128_128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTD512load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSWB128_128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTDMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXBDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTDMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTDMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXBDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQ128load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQ256load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSXBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQ512load {sym} [off] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVSXBQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPOPCNTQMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXDQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVSXDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOR128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOR128 (VCMPPS128 [3] x x) (VCMPPS128 [3] y y)) - // result: (VCMPPS128 [3] x y) + // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_0.AuxInt) != 3 { - continue - } - x := v_0.Args[1] - if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_1.AuxInt) != 3 { - continue - } - y := v_1.Args[1] - if y != v_1.Args[0] { - continue - } - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXDQ256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - // match: (VPOR128 (VCMPPD128 [3] x x) (VCMPPD128 [3] y y)) - // result: (VCMPPD128 [3] x y) + // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_0.AuxInt) != 3 { - continue - } - x := v_0.Args[1] - if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_1.AuxInt) != 3 { - continue - } - y := v_1.Args[1] - if y != v_1.Args[0] { - continue - } - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWD128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPOR256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPOR256 (VCMPPS256 [3] x x) (VCMPPS256 [3] y y)) - // result: (VCMPPS256 [3] x y) + // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_0.AuxInt) != 3 { - continue - } - x := v_0.Args[1] - if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_1.AuxInt) != 3 { - continue - } - y := v_1.Args[1] - if y != v_1.Args[0] { - continue - } - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWD256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - // match: (VPOR256 (VCMPPD256 [3] x x) (VCMPPD256 [3] y y)) - // result: (VCMPPD256 [3] x y) + // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_0.AuxInt) != 3 { - continue - } - x := v_0.Args[1] - if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_1.AuxInt) != 3 { - continue - } - y := v_1.Args[1] - if y != v_1.Args[0] { - continue - } - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWQ128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPORD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y))) - // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) + // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPMOVMToVec32x16 { - continue - } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0_0.AuxInt) != 3 { - continue - } - x := v_0_0.Args[1] - if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec32x16 { - continue - } - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1_0.AuxInt) != 3 { - continue - } - y := v_1_0.Args[1] - if y != v_1_0.Args[0] { - continue - } - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWQ256 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - // match: (VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) - // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64VPMOVMToVec64x8 { - continue - } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_0_0.AuxInt) != 3 { - continue - } - x := v_0_0.Args[1] - if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec64x8 { - continue - } - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_1_0.AuxInt) != 3 { - continue - } - y := v_1_0.Args[1] - if y != v_1_0.Args[0] { - continue - } - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWQ512 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVSXWQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - // match: (VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPORDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDW128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPORDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORDMasked256load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVUSDWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPORDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPORQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORQ512load {sym} [off] x ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPORQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQD128_128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPORQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORQMasked256load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VPMOVUSQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPORQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPORQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPORQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQW128_128 { + break } - break + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB128_128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVWB128_128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVZXBDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXBDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVZXBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVZXBQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVZXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXDQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXDQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVZXDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVZXWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVZXWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVZXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVZXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVZXWQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMULHUW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMULHUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMULHW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMULHWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMULLD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMULLDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMULLQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMULLQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMULLW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMULLWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPOPCNTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPOPCNTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) - for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPOPCNTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPOPCNTWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPROLD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPROLDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPROLQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPROLQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPROLVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPROLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPROLVQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPROLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPRORD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPRORDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPRORQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPRORQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPRORVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPRORVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPRORVQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPRORVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPSHLDDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPSHLDQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHLDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHRDDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHRDQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHRDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHUFBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHUFDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFHW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHUFHWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSHUFLW128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFLWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFLW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHUFLWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLD128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSLLDMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLQ128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSLLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSLLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSLLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSLLVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLW128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSLLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAD128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRADMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAQ128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRAVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAW128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRAWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRLDMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD128load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD256load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW128const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD512load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSUBBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VPSUBDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBQ128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VPSUBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBSB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VPSUBSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ128load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBSW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSUBSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ256load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSB128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSUBUSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ512load {sym} [off] x y ptr mem) + // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSUBUSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBW128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VPSUBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VRCP14PD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VRCP14PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VREDUCEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VREDUCEPSMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VRNDSCALEPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VRNDSCALEPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VRNDSCALEPSMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VRSQRT14PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VSCALEFPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VSCALEFPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VSQRTPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VSQRTPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VSUBPD128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VSUBPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VSUBPS128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VSUBPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + b := v.Block + // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VADDPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VADDPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) + // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VADDPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VADDPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD128load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTDQ2PD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTDQ2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTDQ2PD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTDQ2PDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD256load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTDQ2PS256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTDQ2PSMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTDQ2PS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTDQ2PSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD512load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTPD2PSY128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPD2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTPD2PSY128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTPD2PSYMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTPS2PD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPS2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTPS2PD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTPS2PDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTQQ2PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTQQ2PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTQQ2PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTQQ2PSY128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTQQ2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTQQ2PSY128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTQQ2PSYMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ128load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTTPD2DQY128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2DQYMasked128Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2DQY128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTTPD2DQYMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ256load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTTPD2QQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2QQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2QQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTTPD2QQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ512load {sym} [off] x y ptr mem) + // match: (VPBLENDVB256 dst (VCVTTPD2UDQY128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2UDQYMasked128Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UDQY128 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VCVTTPD2UDQYMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTTPD2UQQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPD2UQQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPD2UQQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTTPD2UQQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2DQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTTPS2DQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTTPS2QQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2QQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2QQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_3 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.reset(OpAMD64VCVTTPS2QQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VCVTTPS2UDQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2UDQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VCVTTPS2UDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTTPS2UQQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2UQQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2UQQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VCVTTPS2UQQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTUDQ2PD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUDQ2PDMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUDQ2PD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VCVTUDQ2PDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VCVTUDQ2PS256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUDQ2PSMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VCVTUDQ2PS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VCVTUDQ2PSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLD128 x (MOVQconst [c])) - // result: (VPSLLD128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VCVTUQQ2PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUQQ2PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLD256 x (MOVQconst [c])) - // result: (VPSLLD256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VCVTUQQ2PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLD512 x (MOVQconst [c])) - // result: (VPSLLD512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VCVTUQQ2PSY128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTUQQ2PSYMasked128Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VCVTUQQ2PSY128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTUQQ2PSYMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VDIVPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VDIVPS256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VDIVPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VGF2P8MULB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VGF2P8MULBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VMAXPD256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMAXPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VMAXPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VMAXPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VMINPD256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMINPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VMINPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VMINPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQ128 x (MOVQconst [c])) - // result: (VPSLLQ128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VMULPD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQ256 x (MOVQconst [c])) - // result: (VPSLLQ256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VMULPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQ512 x (MOVQconst [c])) - // result: (VPSLLQ512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VMULPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VMULPS256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMULPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPABSB256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPABSB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPABSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPABSD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPABSD256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPABSQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPABSQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPABSW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPABSW256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPACKSSDW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPACKSSDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPACKUSDW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPADDB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPADDBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPADDD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPADDDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPADDQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPADDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPADDSB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPADDSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPADDSW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPADDSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPADDUSB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPADDUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPADDUSW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPADDUSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPADDW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPADDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPALIGNR256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPALIGNRMasked256Merging dst [a] x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPALIGNR256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPALIGNRMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLW128 x (MOVQconst [c])) - // result: (VPSLLW128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPAVGB256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLW256 x (MOVQconst [c])) - // result: (VPSLLW256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VPAVGBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLW512 x (MOVQconst [c])) - // result: (VPSLLW512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPAVGW256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPAVGWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTD256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPLZCNTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPLZCNTQ256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPLZCNTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMADDUBSW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMADDUBSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAD128 x (MOVQconst [c])) - // result: (VPSRAD128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMADDWD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMADDWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAD256 x (MOVQconst [c])) - // result: (VPSRAD256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSB256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAD512 x (MOVQconst [c])) - // result: (VPSRAD512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMAXSQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked128 x (MOVQconst [c]) mask) - // result: (VPSRADMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMAXSW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMAXUBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked256 x (MOVQconst [c]) mask) - // result: (VPSRADMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUD256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMAXUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked512 x (MOVQconst [c]) mask) - // result: (VPSRADMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMAXUW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMAXUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMINSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQ128 x (MOVQconst [c])) - // result: (VPSRAQ128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMINSD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMINSQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQ128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMINSQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQ256 x (MOVQconst [c])) - // result: (VPSRAQ256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMINSW256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQ256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMINUBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQ512 x (MOVQconst [c])) - // result: (VPSRAQ512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMINUD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMINUQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMINUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMINUW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMINUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVDB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVDB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMOVDW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVDW128_256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVQB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVQB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMOVQD128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVQD128_256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVQW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVQW128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMOVQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSDB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSDW128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVSDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQD128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVSQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ128load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSQW128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQ128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVSQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ256load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSWB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQ256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXBW512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPMOVSXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXDQ512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVSXDQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVSXWD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVSXWDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDB128_256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAW128 x (MOVQconst [c])) - // result: (VPSRAW128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSDW128_256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAW256 x (MOVQconst [c])) - // result: (VPSRAW256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQB128_256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAW512 x (MOVQconst [c])) - // result: (VPSRAW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQD128_256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSQW128_256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVUSWB128_256 { break } - c := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLD128 x (MOVQconst [c])) - // result: (VPSRLD128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVWB128_256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLD256 x (MOVQconst [c])) - // result: (VPSRLD256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXBW512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVZXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLD512 x (MOVQconst [c])) - // result: (VPSRLD512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXDQ512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMOVZXDQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPMOVZXWD512 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLD512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPMOVZXWDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked128 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMULHUW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMULHUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPMULHW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLDMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMULHWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked256 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMULLD256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMULLDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPMULLQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLDMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPMULLQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked512 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPMULLW256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPMULLWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLDMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPOPCNTBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQ128 x (MOVQconst [c])) - // result: (VPSRLQ128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQ256 x (MOVQconst [c])) - // result: (VPSRLQ256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTQ256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQ512 x (MOVQconst [c])) - // result: (VPSRLQ512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTW256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPOPCNTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { - v_0 := v.Args[0] - // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPROLD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLQ512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VPROLDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked128 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPROLQ256 { break } - c := auxIntToInt64(v_1.AuxInt) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPROLQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPROLVD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLQMasked128constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPROLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked256 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPROLVQ256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPROLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPRORD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLQMasked256constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPRORDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked512 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPRORQ256 { break } - c := auxIntToInt64(v_1.AuxInt) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPRORQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPRORVD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLQMasked512constload) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VPRORVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPRORVQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPRORVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHLDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHLDQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHLDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSHRDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHRDQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHRDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSHUFBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLW128 x (MOVQconst [c])) - // result: (VPSRLW128const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFD256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSHUFDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLW256 x (MOVQconst [c])) - // result: (VPSRLW256const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFHW256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSHUFHWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLW512 x (MOVQconst [c])) - // result: (VPSRLW512const [uint8(c)] x) + // match: (VPBLENDVB256 dst (VPSHUFLW256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSHUFLWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFLW256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSHUFLWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLWMasked128 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked128const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSLLD256const { break } - c := auxIntToInt64(v_1.AuxInt) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSLLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLWMasked256 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked256const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSLLQ256const { break } - c := auxIntToInt64(v_1.AuxInt) + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSLLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRLWMasked512 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked512const [uint8(c)] x mask) + // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVD256 { break } - c := auxIntToInt64(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSLLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSLLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLVW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSLLVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSLLW256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSLLWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAD256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRADMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAQ256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRAQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] + y := v_1.Args[1] + x := v_1.Args[0] mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.reset(OpAMD64VPSRAVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRAW256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRAWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLD256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLQ256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGQ128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGQ256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) + // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - c := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - l := v_2 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLVW256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPTERNLOGQ512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.reset(OpAMD64VPSRLVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKHDQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSRLW256const { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPUNPCKHDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSRLWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKHQDQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBB256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPUNPCKHQDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSUBBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKLDQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPUNPCKLDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSUBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKLQDQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VPSUBQ256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPUNPCKLQDQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.reset(OpAMD64VPSUBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORD512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORD512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPSUBSB256 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSUBSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPSUBSW256 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSUBSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked256load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSB256 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSUBUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSW256 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSUBUSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQ512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQ512load {sym} [off] x ptr mem) + // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VPSUBW256 { + break } - break + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSUBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked128load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VRCP14PD256 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked256load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break } - break + v.reset(OpAMD64VRCP14PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked512load {sym} [off] x ptr mask mem) + // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPD256 { + break } - break + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VREDUCEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { - v_0 := v.Args[0] - // match: (VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD128load {sym} [off] ptr mem) + // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VREDUCEPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PD128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VREDUCEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PD256(v *Value) bool { - v_0 := v.Args[0] - // match: (VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD256load {sym} [off] ptr mem) + // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VRNDSCALEPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PD256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VRNDSCALEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD512load {sym} [off] ptr mem) + // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VRNDSCALEPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VRNDSCALEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VRSQRT14PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VSCALEFPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VSCALEFPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VSCALEFPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { - v_0 := v.Args[0] - // match: (VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PS512load {sym} [off] ptr mem) + // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PS512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VSQRTPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked128load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PSMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VSQRTPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked256load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + dst := v_0 + if v_1.Op != OpAMD64VSUBPD256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PSMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VSUBPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked512load {sym} [off] ptr mask mem) + // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + dst := v_0 + if v_1.Op != OpAMD64VSUBPS256 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PSMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.reset(OpAMD64VSUBPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBROADCASTB128(v *Value) bool { v_0 := v.Args[0] - // match: (VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + b := v.Block + // match: (VPBROADCASTB128 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB128 (VMOVQ y)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { break } - v.reset(OpAMD64VREDUCEPD128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB128) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBROADCASTB256(v *Value) bool { v_0 := v.Args[0] - // match: (VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + b := v.Block + // match: (VPBROADCASTB256 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB256 (VMOVQ y)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { break } - v.reset(OpAMD64VREDUCEPD256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB256) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBROADCASTB512(v *Value) bool { v_0 := v.Args[0] - // match: (VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + b := v.Block + // match: (VPBROADCASTB512 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB512 (VMOVQ y)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { break } - v.reset(OpAMD64VREDUCEPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB512) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPBROADCASTW128(v *Value) bool { v_0 := v.Args[0] - // match: (VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + b := v.Block + // match: (VPBROADCASTW128 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW128 (VMOVQ y)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { break } - v.reset(OpAMD64VREDUCEPDMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW128) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPBROADCASTW256(v *Value) bool { v_0 := v.Args[0] - // match: (VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + b := v.Block + // match: (VPBROADCASTW256 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW256 (VMOVQ y)) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { break } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { break } - v.reset(OpAMD64VREDUCEPDMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW256) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPBROADCASTW512(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTW512 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW512 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW512) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -63688,26 +64139,28 @@ func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPDMasked512load) + v.reset(OpAMD64VPCMPD512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -63715,25 +64168,29 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPS128load) + v.reset(OpAMD64VPCMPDMasked128load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -63741,25 +64198,29 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPS256load) + v.reset(OpAMD64VPCMPDMasked256load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -63767,82 +64228,142 @@ func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPS512load) + v.reset(OpAMD64VPCMPDMasked512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPEQD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPEQD512load {sym} [off] x ptr mem) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPSMasked128load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.reset(OpAMD64VPCMPGTD512load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPGTQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPGTQ512load {sym} [off] x ptr mem) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPSMasked256load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.reset(OpAMD64VPCMPGTQ512load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -63850,26 +64371,28 @@ func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VREDUCEPSMasked512load) + v.reset(OpAMD64VPCMPQ512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -63877,25 +64400,29 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPD128load) + v.reset(OpAMD64VPCMPQMasked128load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -63903,25 +64430,29 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPD256load) + v.reset(OpAMD64VPCMPQMasked256load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -63929,136 +64460,147 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPD512load) + v.reset(OpAMD64VPCMPQMasked512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPUD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked128load) + v.reset(OpAMD64VPCMPUD512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked256load) + v.reset(OpAMD64VPCMPUDMasked128load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked512load) + v.reset(OpAMD64VPCMPUDMasked256load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPS128load) + v.reset(OpAMD64VPCMPUDMasked512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) @@ -64068,105 +64610,115 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPS256load) + v.reset(OpAMD64VPCMPUQ512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) + // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPS512load) + v.reset(OpAMD64VPCMPUQMasked128load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked128load) + v.reset(OpAMD64VPCMPUQMasked256load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { c := auxIntToUint8(v.AuxInt) - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked256load) + v.reset(OpAMD64VPCMPUQMasked512load) v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + // result: (VPDPWSSD512load {sym} [off] x y ptr mem) for { - c := auxIntToUint8(v.AuxInt) - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64174,25 +64726,29 @@ func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.reset(OpAMD64VPDPWSSD512load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PD128load {sym} [off] ptr mem) + // result: (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64200,24 +64756,30 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PD128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PD128load) + v.reset(OpAMD64VPDPWSSDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PD256load {sym} [off] ptr mem) + // result: (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64225,24 +64787,30 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PD256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PD256load) + v.reset(OpAMD64VPDPWSSDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PD512(v *Value) bool { - v_0 := v.Args[0] - // match: (VRSQRT14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PD512load {sym} [off] ptr mem) + // result: (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64250,52 +64818,55 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PD512load) + v.reset(OpAMD64VPDPWSSDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) + // result: (VPERMD512load {sym} [off] x ptr mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PDMasked128load) + v.reset(OpAMD64VPERMD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) + // result: (VPERMDMasked256load {sym} [off] x ptr mask mem) for { - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64303,52 +64874,28 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PDMasked256load) + v.reset(OpAMD64VPERMDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) - for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { - break - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_1 - if !(canMergeLoad(v, l) && clobber(l)) { - break - } - v.reset(OpAMD64VRSQRT14PDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VRSQRT14PS512(v *Value) bool { - v_0 := v.Args[0] - // match: (VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PS512load {sym} [off] ptr mem) + // result: (VPERMDMasked512load {sym} [off] x ptr mask mem) for { - l := v_0 + x := v_0 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64356,25 +64903,29 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PS512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PS512load) + v.reset(OpAMD64VPERMDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2D128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PSMasked128load {sym} [off] ptr mask mem) + // result: (VPERMI2D128load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64382,26 +64933,28 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PSMasked128load) + v.reset(OpAMD64VPERMI2D128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2D256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PSMasked256load {sym} [off] ptr mask mem) + // result: (VPERMI2D256load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64409,26 +64962,28 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PSMasked256load) + v.reset(OpAMD64VPERMI2D256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2D512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRSQRT14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRSQRT14PSMasked512load {sym} [off] ptr mask mem) + // result: (VPERMI2D512load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64436,27 +64991,29 @@ func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRSQRT14PSMasked512load) + v.reset(OpAMD64VPERMI2D512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2DMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPD128load {sym} [off] x ptr mem) + // result: (VPERMI2DMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64464,26 +65021,30 @@ func rewriteValueAMD64_OpAMD64VSCALEFPD128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPD128load) + v.reset(OpAMD64VPERMI2DMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2DMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPD256load {sym} [off] x ptr mem) + // result: (VPERMI2DMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64491,26 +65052,30 @@ func rewriteValueAMD64_OpAMD64VSCALEFPD256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPD256load) + v.reset(OpAMD64VPERMI2DMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2DMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPD512load {sym} [off] x ptr mem) + // result: (VPERMI2DMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64518,27 +65083,29 @@ func rewriteValueAMD64_OpAMD64VSCALEFPD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPD512load) + v.reset(OpAMD64VPERMI2DMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PD128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) + // result: (VPERMI2PD128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64546,28 +65113,28 @@ func rewriteValueAMD64_OpAMD64VSCALEFPDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPDMasked128load) + v.reset(OpAMD64VPERMI2PD128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PD256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) + // result: (VPERMI2PD256load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64575,28 +65142,28 @@ func rewriteValueAMD64_OpAMD64VSCALEFPDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPDMasked256load) + v.reset(OpAMD64VPERMI2PD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) + // result: (VPERMI2PD512load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64604,27 +65171,29 @@ func rewriteValueAMD64_OpAMD64VSCALEFPDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPDMasked512load) + v.reset(OpAMD64VPERMI2PD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPS128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPS128load {sym} [off] x ptr mem) + // result: (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64632,26 +65201,30 @@ func rewriteValueAMD64_OpAMD64VSCALEFPS128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPS128load) + v.reset(OpAMD64VPERMI2PDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPS256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPS256load {sym} [off] x ptr mem) + // result: (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64659,26 +65232,30 @@ func rewriteValueAMD64_OpAMD64VSCALEFPS256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPS256load) + v.reset(OpAMD64VPERMI2PDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPS512load {sym} [off] x ptr mem) + // result: (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64686,27 +65263,29 @@ func rewriteValueAMD64_OpAMD64VSCALEFPS512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPS512load) + v.reset(OpAMD64VPERMI2PDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PS128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPSMasked128load {sym} [off] x ptr mask mem) + // result: (VPERMI2PS128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64714,28 +65293,28 @@ func rewriteValueAMD64_OpAMD64VSCALEFPSMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPSMasked128load) + v.reset(OpAMD64VPERMI2PS128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PS256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPSMasked256load {sym} [off] x ptr mask mem) + // result: (VPERMI2PS256load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64743,28 +65322,28 @@ func rewriteValueAMD64_OpAMD64VSCALEFPSMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPSMasked256load) + v.reset(OpAMD64VPERMI2PS256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSCALEFPSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PS512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSCALEFPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSCALEFPSMasked512load {sym} [off] x ptr mask mem) + // result: (VPERMI2PS512load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64772,81 +65351,91 @@ func rewriteValueAMD64_OpAMD64VSCALEFPSMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSCALEFPSMasked512load) + v.reset(OpAMD64VPERMI2PS512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // result: (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem) for { - c := auxIntToUint8(v.AuxInt) x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSHUFPD512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.reset(OpAMD64VPERMI2PSMasked128load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) + // result: (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem) for { - c := auxIntToUint8(v.AuxInt) x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSHUFPS512load) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.reset(OpAMD64VPERMI2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPD512load {sym} [off] ptr mem) + // result: (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64854,25 +65443,29 @@ func rewriteValueAMD64_OpAMD64VSQRTPD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPD512load) + v.reset(OpAMD64VPERMI2PSMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2Q128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPDMasked128load {sym} [off] ptr mask mem) + // result: (VPERMI2Q128load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -64880,26 +65473,28 @@ func rewriteValueAMD64_OpAMD64VSQRTPDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPDMasked128load) + v.reset(OpAMD64VPERMI2Q128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2Q256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPDMasked256load {sym} [off] ptr mask mem) + // result: (VPERMI2Q256load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -64907,26 +65502,28 @@ func rewriteValueAMD64_OpAMD64VSQRTPDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPDMasked256load) + v.reset(OpAMD64VPERMI2Q256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2Q512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPDMasked512load {sym} [off] ptr mask mem) + // result: (VPERMI2Q512load {sym} [off] x y ptr mem) for { - l := v_0 + x := v_0 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -64934,130 +65531,143 @@ func rewriteValueAMD64_OpAMD64VSQRTPDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPDMasked512load) + v.reset(OpAMD64VPERMI2Q512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2QMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPS512load {sym} [off] ptr mem) + // result: (VPERMI2QMasked128load {sym} [off] x y ptr mask mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPS512load) + v.reset(OpAMD64VPERMI2QMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2QMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPSMasked128load {sym} [off] ptr mask mem) + // result: (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPSMasked128load) + v.reset(OpAMD64VPERMI2QMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMI2QMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPSMasked256load {sym} [off] ptr mask mem) + // result: (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPSMasked256load) + v.reset(OpAMD64VPERMI2QMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSQRTPSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSQRTPSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSQRTPSMasked512load {sym} [off] ptr mask mem) + // result: (VPERMPD256load {sym} [off] x ptr mem) for { - l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSQRTPSMasked512load) + v.reset(OpAMD64VPERMPD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSUBPD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPD512load {sym} [off] x ptr mem) + // result: (VPERMPD512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -65071,7 +65681,7 @@ func rewriteValueAMD64_OpAMD64VSUBPD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPD512load) + v.reset(OpAMD64VPERMPD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -65079,17 +65689,17 @@ func rewriteValueAMD64_OpAMD64VSUBPD512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VSUBPDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPDMasked128load {sym} [off] x ptr mask mem) + // result: (VPERMPDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) @@ -65100,7 +65710,7 @@ func rewriteValueAMD64_OpAMD64VSUBPDMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPDMasked128load) + v.reset(OpAMD64VPERMPDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -65108,17 +65718,17 @@ func rewriteValueAMD64_OpAMD64VSUBPDMasked128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VSUBPDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPDMasked256load {sym} [off] x ptr mask mem) + // result: (VPERMPDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) @@ -65129,7 +65739,7 @@ func rewriteValueAMD64_OpAMD64VSUBPDMasked256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPDMasked256load) + v.reset(OpAMD64VPERMPDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -65137,13 +65747,12 @@ func rewriteValueAMD64_OpAMD64VSUBPDMasked256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VSUBPDMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPERMPS512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPDMasked512load {sym} [off] x ptr mask mem) + // result: (VPERMPS512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -65154,56 +65763,57 @@ func rewriteValueAMD64_OpAMD64VSUBPDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPDMasked512load) + v.reset(OpAMD64VPERMPS512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSUBPS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPSMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPS512load {sym} [off] x ptr mem) + // result: (VPERMPSMasked256load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPS512load) + v.reset(OpAMD64VPERMPSMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSUBPSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMPSMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPSMasked128load {sym} [off] x ptr mask mem) + // result: (VPERMPSMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) @@ -65214,7 +65824,7 @@ func rewriteValueAMD64_OpAMD64VSUBPSMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPSMasked128load) + v.reset(OpAMD64VPERMPSMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -65222,13 +65832,12 @@ func rewriteValueAMD64_OpAMD64VSUBPSMasked128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VSUBPSMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPERMQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPSMasked256load {sym} [off] x ptr mask mem) + // result: (VPERMQ256load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -65239,25 +65848,23 @@ func rewriteValueAMD64_OpAMD64VSUBPSMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPSMasked256load) + v.reset(OpAMD64VPERMQ256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VSUBPSMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPERMQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VSUBPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VSUBPSMasked512load {sym} [off] x ptr mask mem) + // result: (VPERMQ512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -65268,680 +65875,608 @@ func rewriteValueAMD64_OpAMD64VSUBPSMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VSUBPSMasked512load) + v.reset(OpAMD64VPERMQ512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XADDLlock [off1+off2] {sym} val ptr mem) + // match: (VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQMasked256load {sym} [off] x ptr mask mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XADDLlock) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64VPERMQMasked256load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPERMQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XADDQlock [off1+off2] {sym} val ptr mem) + // match: (VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQMasked512load {sym} [off] x ptr mask mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XADDQlock) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64VPERMQMasked512load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPINSRD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XCHGL [off1+off2] {sym} val ptr mem) + // match: (VPINSRD128 [0] (Zero128 ) y) + // cond: y.Type.IsFloat() + // result: (VMOVSSf2v y) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { break } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + y := v_1 + if !(y.Type.IsFloat()) { break } - v.reset(OpAMD64XCHGL) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + v.reset(OpAMD64VMOVSSf2v) + v.Type = types.TypeVec128 + v.AddArg(y) return true } - // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB - // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + // match: (VPINSRD128 [0] (Zero128 ) y) + // cond: !y.Type.IsFloat() + // result: (VMOVD y) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + y := v_1 + if !(!y.Type.IsFloat()) { break } - v.reset(OpAMD64XCHGL) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, ptr, mem) + v.reset(OpAMD64VMOVD) + v.Type = types.TypeVec128 + v.AddArg(y) return true } return false } -func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XCHGQ [off1+off2] {sym} val ptr mem) + // match: (VPINSRQ128 [0] (Zero128 ) y) + // cond: y.Type.IsFloat() + // result: (VMOVSDf2v y) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { break } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + y := v_1 + if !(y.Type.IsFloat()) { break } - v.reset(OpAMD64XCHGQ) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + v.reset(OpAMD64VMOVSDf2v) + v.Type = types.TypeVec128 + v.AddArg(y) return true } - // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB - // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + // match: (VPINSRQ128 [0] (Zero128 ) y) + // cond: !y.Type.IsFloat() + // result: (VMOVQ y) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { break } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + y := v_1 + if !(!y.Type.IsFloat()) { break } - v.reset(OpAMD64XCHGQ) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, ptr, mem) + v.reset(OpAMD64VMOVQ) + v.Type = types.TypeVec128 + v.AddArg(y) return true } return false } -func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPLZCNTD128(v *Value) bool { v_0 := v.Args[0] - // match: (XORL (SHLL (MOVLconst [1]) y) x) - // result: (BTCL x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLL { - continue - } - y := v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { - continue - } - x := v_1 - v.reset(OpAMD64BTCL) - v.AddArg2(x, y) - return true - } - break - } - // match: (XORL x (MOVLconst [c])) - // result: (XORLconst [c] x) + // match: (VPLZCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTD128load {sym} [off] ptr mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break } - break - } - // match: (XORL x x) - // result: (MOVLconst [0]) - for { - x := v_0 - if x != v_1 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + v.reset(OpAMD64VPLZCNTD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (XORL x l:(MOVLload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (XORLload x [off] {sym} ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVLload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64XORLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true - } - break - } - // match: (XORL x (ADDLconst [-1] x)) - // cond: buildcfg.GOAMD64 >= 3 - // result: (BLSMSKL x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { - continue - } - v.reset(OpAMD64BLSMSKL) - v.AddArg(x) - return true - } - break - } return false } -func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPLZCNTD256(v *Value) bool { v_0 := v.Args[0] - // match: (XORLconst [1] (SETNE x)) - // result: (SETEQ x) + // match: (VPLZCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTD256load {sym} [off] ptr mem) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.reset(OpAMD64SETEQ) - v.AddArg(x) - return true - } - // match: (XORLconst [1] (SETEQ x)) - // result: (SETNE x) - for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETNE) - v.AddArg(x) + v.reset(OpAMD64VPLZCNTD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (XORLconst [1] (SETL x)) - // result: (SETGE x) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPLZCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTD512load {sym} [off] ptr mem) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.reset(OpAMD64SETGE) - v.AddArg(x) - return true - } - // match: (XORLconst [1] (SETGE x)) - // result: (SETL x) - for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETL) - v.AddArg(x) + v.reset(OpAMD64VPLZCNTD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (XORLconst [1] (SETLE x)) - // result: (SETG x) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPLZCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTDMasked128load {sym} [off] ptr mask mem) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - v.reset(OpAMD64SETG) - v.AddArg(x) - return true - } - // match: (XORLconst [1] (SETG x)) - // result: (SETLE x) - for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETLE) - v.AddArg(x) + v.reset(OpAMD64VPLZCNTDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (XORLconst [1] (SETB x)) - // result: (SETAE x) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPLZCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTDMasked256load {sym} [off] ptr mask mem) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.reset(OpAMD64SETAE) - v.AddArg(x) - return true - } - // match: (XORLconst [1] (SETAE x)) - // result: (SETB x) - for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETB) - v.AddArg(x) + v.reset(OpAMD64VPLZCNTDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (XORLconst [1] (SETBE x)) - // result: (SETA x) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPLZCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTDMasked512load {sym} [off] ptr mask mem) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.reset(OpAMD64SETA) - v.AddArg(x) - return true - } - // match: (XORLconst [1] (SETA x)) - // result: (SETBE x) - for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - v.reset(OpAMD64SETBE) - v.AddArg(x) + v.reset(OpAMD64VPLZCNTDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (XORLconst [c] (XORLconst [d] x)) - // result: (XORLconst [c ^ d] x) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64XORLconst { - break - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(c ^ d) - v.AddArg(x) - return true - } - // match: (XORLconst [0] x) - // result: x + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPLZCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQ128load {sym} [off] ptr mem) for { - if auxIntToInt32(v.AuxInt) != 0 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0 - v.copyOf(x) - return true - } - // match: (XORLconst [c] (MOVLconst [d])) - // result: (MOVLconst [c^d]) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - d := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(c ^ d) + v.reset(OpAMD64VPLZCNTQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPLZCNTQ256(v *Value) bool { v_0 := v.Args[0] - // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (VPLZCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQ256load {sym} [off] ptr mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.reset(OpAMD64VPLZCNTQ256load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg2(base, mem) + v.AddArg2(ptr, mem) return true } - // match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPLZCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQ512load {sym} [off] ptr mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) + v.reset(OpAMD64VPLZCNTQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XORLload(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPLZCNTQMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (XORLload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XORLload [off1+off2] {sym} val base mem) + // match: (VPLZCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQMasked128load {sym} [off] ptr mask mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLload) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64VPLZCNTQMasked128load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) - return true - } - // match: (XORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (XORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64XORLload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) - return true - } - // match: (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) - // result: (XORL x (MOVLf2i y)) - for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { - break - } - y := v_2.Args[1] - if ptr != v_2.Args[0] { - break - } - v.reset(OpAMD64XORL) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) - v0.AddArg(y) - v.AddArg2(x, v0) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XORLmodify(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPLZCNTQMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XORLmodify [off1+off2] {sym} base val mem) + // match: (VPLZCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQMasked256load {sym} [off] ptr mask mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) + v.reset(OpAMD64VPLZCNTQMasked256load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) + v.AddArg3(ptr, mask, mem) return true } - // match: (XORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (XORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + return false +} +func rewriteValueAMD64_OpAMD64VPLZCNTQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPLZCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPLZCNTQMasked512load {sym} [off] ptr mask mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64XORLmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) + v.reset(OpAMD64VPLZCNTQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXSD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORQ (SHLQ (MOVQconst [1]) y) x) - // result: (BTCQ x y) + // match: (VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSD512load {sym} [off] x ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLQ { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { continue } - y := v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { continue } - x := v_1 - v.reset(OpAMD64BTCQ) - v.AddArg2(x, y) + v.reset(OpAMD64VPMAXSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } break } - // match: (XORQ (MOVQconst [c]) x) - // cond: isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31 - // result: (BTCQconst [int8(log64u(uint64(c)))] x) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked128load {sym} [off] x ptr mask mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64MOVQconst { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { continue } - c := auxIntToInt64(v_0.AuxInt) - x := v_1 - if !(isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { continue } - v.reset(OpAMD64BTCQconst) - v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v.AddArg(x) + v.reset(OpAMD64VPMAXSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } break } - // match: (XORQ x (MOVQconst [c])) - // cond: is32Bit(c) - // result: (XORQconst [int32(c)] x) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked256load {sym} [off] x ptr mask mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { continue } - c := auxIntToInt64(v_1.AuxInt) - if !(is32Bit(c)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { continue } - v.reset(OpAMD64XORQconst) - v.AuxInt = int32ToAuxInt(int32(c)) - v.AddArg(x) + v.reset(OpAMD64VPMAXSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } break } - // match: (XORQ x x) - // result: (MOVLconst [0]) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - if x != v_1 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) - return true + break } - // match: (XORQ x l:(MOVQload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (XORQload x [off] {sym} ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ128load {sym} [off] x ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 l := v_1 - if l.Op != OpAMD64MOVQload { + if l.Op != OpAMD64VMOVDQUload128 { continue } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + if !(canMergeLoad(v, l) && clobber(l)) { continue } - v.reset(OpAMD64XORQload) + v.reset(OpAMD64VPMAXSQ128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -65949,12454 +66484,24822 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } break } - // match: (XORQ x (ADDQconst [-1] x)) - // cond: buildcfg.GOAMD64 >= 3 - // result: (BLSMSKQ x) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ256load {sym} [off] x ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { continue } - v.reset(OpAMD64BLSMSKQ) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } break } return false } -func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXSQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORQconst [c] (XORQconst [d] x)) - // result: (XORQconst [c ^ d] x) - for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64XORQconst { - break - } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64XORQconst) - v.AuxInt = int32ToAuxInt(c ^ d) - v.AddArg(x) - return true - } - // match: (XORQconst [0] x) - // result: x + // match: (VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ512load {sym} [off] x ptr mem) for { - if auxIntToInt32(v.AuxInt) != 0 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - x := v_0 - v.copyOf(x) - return true + break } - // match: (XORQconst [c] (MOVQconst [d])) - // result: (MOVQconst [int64(c)^d]) + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked128load {sym} [off] x ptr mask mem) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVQconst { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - d := auxIntToInt64(v_0.AuxInt) - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(int64(c) ^ d) - return true + break } return false } -func rewriteValueAMD64_OpAMD64XORQconstmodify(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXSQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (XORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) - for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2)) { - break - } - v.reset(OpAMD64XORQconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(sym) - v.AddArg2(base, mem) - return true - } - // match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) - // result: (XORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) + // match: (VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked256load {sym} [off] x ptr mask mem) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - mem := v_1 - if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64XORQconstmodify) - v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg2(base, mem) - return true + break } return false } -func rewriteValueAMD64_OpAMD64XORQload(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXSQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (XORQload [off1] {sym} val (ADDQconst [off2] base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XORQload [off1+off2] {sym} val base mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break - } - v.reset(OpAMD64XORQload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, base, mem) - return true - } - // match: (XORQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (XORQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - base := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64XORQload) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, base, mem) - return true - } - // match: (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) - // result: (XORQ x (MOVQf2i y)) + // match: (VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked512load {sym} [off] x ptr mask mem) for { - off := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - x := v_0 - ptr := v_1 - if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { - break - } - y := v_2.Args[1] - if ptr != v_2.Args[0] { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64XORQ) - v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) - v0.AddArg(y) - v.AddArg2(x, v0) - return true + break } return false } -func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMAXUD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XORQmodify [off1+off2] {sym} base val mem) - for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { - break - } - v.reset(OpAMD64XORQmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(base, val, mem) - return true - } - // match: (XORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) - // result: (XORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUD512load {sym} [off] x ptr mem) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_0.AuxInt) - sym2 := auxToSym(v_0.Aux) - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64XORQmodify) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(base, val, mem) - return true + break } return false } -func rewriteValueAMD64_OpAddr(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXUDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Addr {sym} base) - // result: (LEAQ {sym} base) + // match: (VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked128load {sym} [off] x ptr mask mem) for { - sym := auxToSym(v.Aux) - base := v_0 - v.reset(OpAMD64LEAQ) - v.Aux = symToAux(sym) - v.AddArg(base) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXUDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAdd32 ptr val mem) - // result: (AddTupleFirst32 val (XADDLlock val ptr mem)) + // match: (VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked256load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64AddTupleFirst32) - v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg2(val, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXUDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAdd64 ptr val mem) - // result: (AddTupleFirst64 val (XADDQlock val ptr mem)) + // match: (VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked512load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64AddTupleFirst64) - v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg2(val, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMAXUQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd32 ptr val mem) - // result: (ANDLlock ptr val mem) + // match: (VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ128load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ANDLlock) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMAXUQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd32value ptr val mem) - // result: (LoweredAtomicAnd32 ptr val mem) + // match: (VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ256load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicAnd32) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMAXUQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd64value ptr val mem) - // result: (LoweredAtomicAnd64 ptr val mem) + // match: (VPMAXUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ512load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicAnd64) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMAXUQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd8 ptr val mem) - // result: (ANDBlock ptr val mem) + // match: (VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked128load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ANDBlock) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPMAXUQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicCompareAndSwap32 ptr old new_ mem) - // result: (CMPXCHGLlock ptr old new_ mem) + // match: (VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked256load {sym} [off] x ptr mask mem) for { - ptr := v_0 - old := v_1 - new_ := v_2 - mem := v_3 - v.reset(OpAMD64CMPXCHGLlock) - v.AddArg4(ptr, old, new_, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPMAXUQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicCompareAndSwap64 ptr old new_ mem) - // result: (CMPXCHGQlock ptr old new_ mem) - for { - ptr := v_0 - old := v_1 - new_ := v_2 - mem := v_3 - v.reset(OpAMD64CMPXCHGQlock) - v.AddArg4(ptr, old, new_, mem) - return true + // match: (VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMINSD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange32 ptr val mem) - // result: (XCHGL val ptr mem) + // match: (VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSD512load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGL) - v.AddArg3(val, ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange64 ptr val mem) - // result: (XCHGQ val ptr mem) + // match: (VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked128load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGQ) - v.AddArg3(val, ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange8 ptr val mem) - // result: (XCHGB val ptr mem) + // match: (VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked256load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGB) - v.AddArg3(val, ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad32 ptr mem) - // result: (MOVLatomicload ptr mem) + // match: (VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked512load {sym} [off] x ptr mask mem) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLatomicload) - v.AddArg2(ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad64 ptr mem) - // result: (MOVQatomicload ptr mem) + // match: (VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ128load {sym} [off] x ptr mem) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVQatomicload) - v.AddArg2(ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad8 ptr mem) - // result: (MOVBatomicload ptr mem) + // match: (VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ256load {sym} [off] x ptr mem) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBatomicload) - v.AddArg2(ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoadPtr ptr mem) - // result: (MOVQatomicload ptr mem) + // match: (VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ512load {sym} [off] x ptr mem) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVQatomicload) - v.AddArg2(ptr, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicOr32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr32 ptr val mem) - // result: (ORLlock ptr val mem) + // match: (VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked128load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ORLlock) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr32value ptr val mem) - // result: (LoweredAtomicOr32 ptr val mem) + // match: (VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked256load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicOr32) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINSQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr64value ptr val mem) - // result: (LoweredAtomicOr64 ptr val mem) + // match: (VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked512load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicOr64) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicOr8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPMINUD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr8 ptr val mem) - // result: (ORBlock ptr val mem) + // match: (VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUD512load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ORBlock) - v.AddArg3(ptr, val, mem) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicStore32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore32 ptr val mem) - // result: (Select1 (XCHGL val ptr mem)) + // match: (VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked128load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicStore64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore64 ptr val mem) - // result: (Select1 (XCHGQ val ptr mem)) + // match: (VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked256load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpAtomicStore8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore8 ptr val mem) - // result: (Select1 (XCHGB val ptr mem)) + // match: (VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked512load {sym} [off] x ptr mask mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStorePtrNoWB ptr val mem) - // result: (Select1 (XCHGQ val ptr mem)) + // match: (VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ128load {sym} [off] x ptr mem) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpBitLen16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen16 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSRL (LEAL1 [1] (MOVWQZX x) (MOVWQZX x))) + // match: (VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ256load {sym} [off] x ptr mem) for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64BSRL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) - v1.AddArg(x) - v0.AddArg2(v1, v1) - v.AddArg(v0) - return true + break } - // match: (BitLen16 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVWQZX x)))) + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ512load {sym} [off] x ptr mem) for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) - return true + break } return false } -func rewriteValueAMD64_OpBitLen32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen32 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSRQ (LEAQ1 [1] (MOVLQZX x) (MOVLQZX x)))) + // match: (VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked128load {sym} [off] x ptr mask mem) for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64) - v1.AuxInt = int32ToAuxInt(1) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) - v2.AddArg(x) - v1.AddArg2(v2, v2) - v0.AddArg(v1) - v.AddArg(v0) - return true + break } - // match: (BitLen32 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL x))) + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) - return true + break } return false } -func rewriteValueAMD64_OpBitLen64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMINUQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen64 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (ADDQconst [1] (CMOVQEQ (Select0 (BSRQ x)) (MOVQconst [-1]) (Select1 (BSRQ x)))) + // match: (VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(1) - v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t) - v1 := b.NewValue0(v.Pos, OpSelect0, t) - v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v2.AddArg(x) - v1.AddArg(v2) - v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) - v3.AuxInt = int64ToAuxInt(-1) - v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4.AddArg(v2) - v0.AddArg3(v1, v3, v4) - v.AddArg(v0) - return true + break } - // match: (BitLen64 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-64] (LZCNTQ x))) + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec16x16ToM (VPMOVMToVec16x16 x)) + // result: x for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + if v_0.Op != OpAMD64VPMOVMToVec16x16 { break } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-64) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) + x := v_0.Args[0] + v.copyOf(x) return true } return false } -func rewriteValueAMD64_OpBitLen8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen8 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSRL (LEAL1 [1] (MOVBQZX x) (MOVBQZX x))) + // match: (VPMOVVec16x32ToM (VPMOVMToVec16x32 x)) + // result: x for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + if v_0.Op != OpAMD64VPMOVMToVec16x32 { break } - v.reset(OpAMD64BSRL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) - v1.AddArg(x) - v0.AddArg2(v1, v1) - v.AddArg(v0) + x := v_0.Args[0] + v.copyOf(x) return true } - // match: (BitLen8 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVBQZX x)))) + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec16x8ToM (VPMOVMToVec16x8 x)) + // result: x for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + if v_0.Op != OpAMD64VPMOVMToVec16x8 { break } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + x := v_0.Args[0] + v.copyOf(x) return true } return false } -func rewriteValueAMD64_OpBswap16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v *Value) bool { v_0 := v.Args[0] - // match: (Bswap16 x) - // result: (ROLWconst [8] x) + // match: (VPMOVVec32x16ToM (VPMOVMToVec32x16 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(8) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec32x16 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeil(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v *Value) bool { v_0 := v.Args[0] - // match: (Ceil x) - // result: (ROUNDSD [2] x) + // match: (VPMOVVec32x4ToM (VPMOVMToVec32x4 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec32x4 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilFloat32x4 x) - // result: (VROUNDPS128 [2] x) + // match: (VPMOVVec32x8ToM (VPMOVMToVec32x8 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec32x8 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilFloat32x8 x) - // result: (VROUNDPS256 [2] x) + // match: (VPMOVVec64x2ToM (VPMOVMToVec64x2 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec64x2 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilFloat64x2 x) - // result: (VROUNDPD128 [2] x) + // match: (VPMOVVec64x4ToM (VPMOVMToVec64x4 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec64x4 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilFloat64x4 x) - // result: (VROUNDPD256 [2] x) + // match: (VPMOVVec64x8ToM (VPMOVMToVec64x8 x)) + // result: x for { - x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec64x8 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilScaledFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+2] x) + // match: (VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) + // result: x for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec8x16 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilScaledFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+2] x) + // match: (VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) + // result: x for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec8x32 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool { v_0 := v.Args[0] - // match: (CeilScaledFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+2] x) + // match: (VPMOVVec8x64ToM (VPMOVMToVec8x64 x)) + // result: x for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) + if v_0.Op != OpAMD64VPMOVMToVec8x64 { + break + } + x := v_0.Args[0] + v.copyOf(x) return true } + return false } -func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLD512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+2] x) + // match: (VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLD512load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+2] x) + // match: (VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked128load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+2] x) + // match: (VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked256load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+2] x) + // match: (VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked512load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+2] x) + // match: (VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ128load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+2] x) + // match: (VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ256load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+2] x) + // match: (VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ512load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+2] x) + // match: (VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked128load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilScaledResidueFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+2] x) + // match: (VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked256load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = uint8ToAuxInt(a + 2) - v.AddArg(x) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPMULLQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat32x16 x mask) - // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VCOMPRESSPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTD128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat32x4 x mask) - // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD128load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VCOMPRESSPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat32x8 x mask) - // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD256load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VCOMPRESSPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat64x2 x mask) - // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD512load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VCOMPRESSPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat64x4 x mask) - // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked128load {sym} [off] ptr mask mem) for { - x := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCOMPRESSPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressFloat64x8 x mask) - // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM mask)) + // match: (VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked256load {sym} [off] ptr mask mem) for { - x := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VCOMPRESSPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt16x16 x mask) - // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) + // match: (VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTQ128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressInt16x32 x mask) - // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) + // match: (VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ128load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTQ256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressInt16x8 x mask) - // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) + // match: (VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ256load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPOPCNTQ512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressInt32x16 x mask) - // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ512load {sym} [off] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt32x4 x mask) - // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked128load {sym} [off] ptr mask mem) for { - x := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt32x8 x mask) - // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked256load {sym} [off] ptr mask mem) for { - x := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt64x2 x mask) - // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked512load {sym} [off] ptr mask mem) for { - x := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOR128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt64x4 x mask) - // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPOR128 (VCMPPS128 [3] x x) (VCMPPS128 [3] y y)) + // result: (VCMPPS128 [3] x y) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS128 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break } -} -func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CompressInt64x8 x mask) - // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) + // match: (VPOR128 (VCMPPD128 [3] x x) (VCMPPD128 [3] y y)) + // result: (VCMPPD128 [3] x y) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD128 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPOR256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressInt8x16 x mask) - // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) + // match: (VPOR256 (VCMPPS256 [3] x x) (VCMPPS256 [3] y y)) + // result: (VCMPPS256 [3] x y) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPS256 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break } -} -func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CompressInt8x32 x mask) - // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) + // match: (VPOR256 (VCMPPD256 [3] x x) (VCMPPD256 [3] y y)) + // result: (VCMPPD256 [3] x y) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_0.AuxInt) != 3 { + continue + } + x := v_0.Args[1] + if x != v_0.Args[0] || v_1.Op != OpAMD64VCMPPD256 || auxIntToUint8(v_1.AuxInt) != 3 { + continue + } + y := v_1.Args[1] + if y != v_1.Args[0] { + continue + } + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, y) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (CompressInt8x64 x mask) - // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (VPORD512 (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) (VPMOVMToVec32x16 (VCMPPS512 [3] y y))) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_0_0.AuxInt) != 3 { + continue + } + x := v_0_0.Args[1] + if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64VCMPPS512 || auxIntToUint8(v_1_0.AuxInt) != 3 { + continue + } + y := v_1_0.Args[1] + if y != v_1_0.Args[0] { + continue + } + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break } -} -func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CompressUint16x16 x mask) - // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) + // match: (VPORD512 (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) (VPMOVMToVec64x8 (VCMPPD512 [3] y y))) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_0_0.AuxInt) != 3 { + continue + } + x := v_0_0.Args[1] + if x != v_0_0.Args[0] || v_1.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64VCMPPD512 || auxIntToUint8(v_1_0.AuxInt) != 3 { + continue + } + y := v_1_0.Args[1] + if y != v_1_0.Args[0] { + continue + } + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break } -} -func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (CompressUint16x32 x mask) - // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) + // match: (VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORD512load {sym} [off] x ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint16x8 x mask) - // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) + // match: (VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked128load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint32x16 x mask) - // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked256load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint32x4 x mask) - // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint32x8 x mask) - // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQ512load {sym} [off] x ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORQMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint64x2 x mask) - // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked128load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint64x4 x mask) - // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked256load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPORQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CompressUint64x8 x mask) - // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) + // match: (VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressUint8x16 x mask) - // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) + // match: (VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressUint8x32 x mask) - // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) + // match: (VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (CompressUint8x64 x mask) - // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) + // match: (VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPCOMPRESSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCondSelect(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CondSelect x y (SETEQ cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQEQ y x cond) + // match: (VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQ { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQEQ) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQNE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETL cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETL { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQLT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETG cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETG { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQGT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETLE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETLE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQLE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETGE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQGE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETA cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQHI y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETA { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQHI) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETB cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQCS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETB { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQCS) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETAE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQCC y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETAE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQCC) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETBE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD128load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETBE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQLS) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETEQF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQEQF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD256load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQEQF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETNEF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNEF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQNEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETGF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGTF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQGTF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETGEF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGEF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQGEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETEQ cond)) - // cond: is32BitInt(t) - // result: (CMOVLEQ y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQ { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLEQ) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: is32BitInt(t) - // result: (CMOVLNE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ128load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLNE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETL cond)) - // cond: is32BitInt(t) - // result: (CMOVLLT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ256load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETL { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLLT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETG cond)) - // cond: is32BitInt(t) - // result: (CMOVLGT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETG { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLGT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETLE cond)) - // cond: is32BitInt(t) - // result: (CMOVLLE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETLE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLLE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETGE cond)) - // cond: is32BitInt(t) - // result: (CMOVLGE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLGE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETA cond)) - // cond: is32BitInt(t) - // result: (CMOVLHI y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETA { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLHI) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPROLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETB cond)) - // cond: is32BitInt(t) - // result: (CMOVLCS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETB { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLCS) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETAE cond)) - // cond: is32BitInt(t) - // result: (CMOVLCC y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETAE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLCC) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETBE cond)) - // cond: is32BitInt(t) - // result: (CMOVLLS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETBE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64CMOVLLS) - v.AddArg3(y, x, cond) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETEQF cond)) - // cond: is32BitInt(t) - // result: (CMOVLEQF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQF { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLEQF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETNEF cond)) - // cond: is32BitInt(t) - // result: (CMOVLNEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNEF { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLNEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETGF cond)) - // cond: is32BitInt(t) - // result: (CMOVLGTF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGF { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLGTF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETGEF cond)) - // cond: is32BitInt(t) - // result: (CMOVLGEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGEF { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLGEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETEQ cond)) - // cond: is16BitInt(t) - // result: (CMOVWEQ y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQ { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWEQ) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: is16BitInt(t) - // result: (CMOVWNE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWNE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (CondSelect x y (SETL cond)) - // cond: is16BitInt(t) - // result: (CMOVWLT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETL { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWLT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETG cond)) - // cond: is16BitInt(t) - // result: (CMOVWGT y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETG { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWGT) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETLE cond)) - // cond: is16BitInt(t) - // result: (CMOVWLE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETLE { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWLE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (CondSelect x y (SETGE cond)) - // cond: is16BitInt(t) - // result: (CMOVWGE y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD128load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWGE) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETA cond)) - // cond: is16BitInt(t) - // result: (CMOVWHI y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD256load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETA { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWHI) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETB cond)) - // cond: is16BitInt(t) - // result: (CMOVWCS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETB { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWCS) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETAE cond)) - // cond: is16BitInt(t) - // result: (CMOVWCC y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETAE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWCC) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETBE cond)) - // cond: is16BitInt(t) - // result: (CMOVWLS y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETBE { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWLS) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETEQF cond)) - // cond: is16BitInt(t) - // result: (CMOVWEQF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWEQF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y (SETNEF cond)) - // cond: is16BitInt(t) - // result: (CMOVWNEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ128load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNEF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWNEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETGF cond)) - // cond: is16BitInt(t) - // result: (CMOVWGTF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ256load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWGTF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y (SETGEF cond)) - // cond: is16BitInt(t) - // result: (CMOVWGEF y x cond) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGEF { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - cond := v_2.Args[0] - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWGEF) - v.AddArg3(y, x, cond) + v.reset(OpAMD64VPRORVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPQconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) - // result: (CMOVLNE y x (CMPQconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPRORVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) - // result: (CMOVWNE y x (CMPQconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPLconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPRORVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t) - // result: (CMOVLNE y x (CMPLconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t) - // result: (CMOVWNE y x (CMPLconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPRORVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPWconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t))) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v0.AuxInt = int16ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t) - // result: (CMOVLNE y x (CMPWconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v0.AuxInt = int16ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPSHLDD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t) - // result: (CMOVWNE y x (CMPWconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v0.AuxInt = int16ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPBconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t))) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPSHLDD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t) - // result: (CMOVLNE y x (CMPBconst [0] check)) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t) - // result: (CMOVWNE y x (CMPBconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) + v.reset(OpAMD64VPSHLDD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpConst16(v *Value) bool { - // match: (Const16 [c]) - // result: (MOVLconst [int32(c)]) - for { - c := auxIntToInt16(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(c)) - return true - } -} -func rewriteValueAMD64_OpConst8(v *Value) bool { - // match: (Const8 [c]) - // result: (MOVLconst [int32(c)]) - for { - c := auxIntToInt8(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(c)) - return true - } -} -func rewriteValueAMD64_OpConstBool(v *Value) bool { - // match: (ConstBool [c]) - // result: (MOVLconst [b2i32(c)]) - for { - c := auxIntToBool(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(b2i32(c)) - return true - } -} -func rewriteValueAMD64_OpConstNil(v *Value) bool { - // match: (ConstNil ) - // result: (MOVQconst [0]) - for { - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) - return true - } -} -func rewriteValueAMD64_OpCtz16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz16 x) - // result: (BSFL (ORLconst [1<<16] x)) + // match: (VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1 << 16) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ctz16NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) + // match: (VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz16NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64BSFL) - v.AddArg(x) + v.reset(OpAMD64VPSHLDDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCtz32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz32 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) + // match: (VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz32 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSFQ (BTSQconst [32] x))) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64) - v1.AuxInt = int8ToAuxInt(32) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ctz32NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) + // match: (VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz32NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64BSFL) - v.AddArg(x) + v.reset(OpAMD64VPSHLDQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpCtz64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz64 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTQ x) + // match: (VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64TZCNTQ) - v.AddArg(x) - return true - } - // match: (Ctz64 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) - for { - t := v.Type - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CMOVQEQ) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1.AddArg(x) - v0.AddArg(v1) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) - v2.AuxInt = int64ToAuxInt(64) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v3.AddArg(v1) - v.AddArg3(v0, v2, v3) + v.reset(OpAMD64VPSHLDQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz64NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTQ x) + // match: (VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64TZCNTQ) - v.AddArg(x) - return true - } - // match: (Ctz64NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSFQ x)) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpCtz8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz8 x) - // result: (BSFL (ORLconst [1<<8 ] x)) + // match: (VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1 << 8) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ctz8NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) + // match: (VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if !(buildcfg.GOAMD64 >= 3) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz8NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64BSFL) - v.AddArg(x) + v.reset(OpAMD64VPSHLDQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt16toMask16x16 x) - // result: (VPMOVMToVec16x16 (KMOVWk x)) + // match: (VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec16x16) - v.Type = types.TypeVec256 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt16toMask32x16 x) - // result: (VPMOVMToVec32x16 (KMOVWk x)) + // match: (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD128load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec32x16) - v.Type = types.TypeVec512 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt16toMask8x16 x) - // result: (VPMOVMToVec8x16 (KMOVWk x)) + // match: (VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD256load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec8x16) - v.Type = types.TypeVec128 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt32Fto32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Cvt32Fto32 x) - // cond: base.ConvertHash.MatchPos(v.Pos, nil) - // result: (XORL y (SARLconst [31] (ANDL y:(CVTTSS2SL x) (NOTL (MOVLf2i x))))) + // match: (VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD512load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64XORL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t) - v0.AuxInt = int8ToAuxInt(31) - v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t) - y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SL, t) - y.AddArg(x) - v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32) - v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32) - v4.AddArg(x) - v3.AddArg(v4) - v1.AddArg2(y, v3) - v0.AddArg(v1) - v.AddArg2(y, v0) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (Cvt32Fto32 x) - // cond: !base.ConvertHash.MatchPos(v.Pos, nil) - // result: (CVTTSS2SL x) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64CVTTSS2SL) - v.Type = t - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCvt32Fto64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Cvt32Fto64 x) - // cond: base.ConvertHash.MatchPos(v.Pos, nil) - // result: (XORQ y (SARQconst [63] (ANDQ y:(CVTTSS2SQ x) (NOTQ (MOVQf2i (CVTSS2SD x))) ))) + // match: (VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64XORQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t) - v0.AuxInt = int8ToAuxInt(63) - v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t) - y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SQ, t) - y.AddArg(x) - v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64) - v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64) - v5 := b.NewValue0(v.Pos, OpAMD64CVTSS2SD, typ.Float64) - v5.AddArg(x) - v4.AddArg(v5) - v3.AddArg(v4) - v1.AddArg2(y, v3) - v0.AddArg(v1) - v.AddArg2(y, v0) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (Cvt32Fto64 x) - // cond: !base.ConvertHash.MatchPos(v.Pos, nil) - // result: (CVTTSS2SQ x) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64CVTTSS2SQ) - v.Type = t - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt32toMask16x32 x) - // result: (VPMOVMToVec16x32 (KMOVDk x)) + // match: (VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ128load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec16x32) - v.Type = types.TypeVec512 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt32toMask8x32 x) - // result: (VPMOVMToVec8x32 (KMOVDk x)) + // match: (VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ256load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec8x32) - v.Type = types.TypeVec256 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt64Fto32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Cvt64Fto32 x) - // cond: base.ConvertHash.MatchPos(v.Pos, nil) - // result: (XORL y (SARLconst [31] (ANDL y:(CVTTSD2SL x) (NOTL (MOVLf2i (CVTSD2SS x)))))) + // match: (VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ512load {sym} [off] x y ptr mem) for { - t := v.Type x := v_0 - if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64XORL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t) - v0.AuxInt = int8ToAuxInt(31) - v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t) - y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SL, t) - y.AddArg(x) - v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32) - v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpAMD64CVTSD2SS, typ.Float32) - v5.AddArg(x) - v4.AddArg(v5) - v3.AddArg(v4) - v1.AddArg2(y, v3) - v0.AddArg(v1) - v.AddArg2(y, v0) - return true - } - // match: (Cvt64Fto32 x) - // cond: !base.ConvertHash.MatchPos(v.Pos, nil) - // result: (CVTTSD2SL x) - for { - t := v.Type - x := v_0 - if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CVTTSD2SL) - v.Type = t - v.AddArg(x) + v.reset(OpAMD64VPSHLDVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpCvt64Fto64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Cvt64Fto64 x) - // cond: base.ConvertHash.MatchPos(v.Pos, nil) - // result: (XORQ y (SARQconst [63] (ANDQ y:(CVTTSD2SQ x) (NOTQ (MOVQf2i x))))) + // match: (VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64XORQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t) - v0.AuxInt = int8ToAuxInt(63) - v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t) - y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SQ, t) - y.AddArg(x) - v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64) - v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64) - v4.AddArg(x) - v3.AddArg(v4) - v1.AddArg2(y, v3) - v0.AddArg(v1) - v.AddArg2(y, v0) - return true - } - // match: (Cvt64Fto64 x) - // cond: !base.ConvertHash.MatchPos(v.Pos, nil) - // result: (CVTTSD2SQ x) - for { - t := v.Type - x := v_0 - if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64CVTTSD2SQ) - v.Type = t - v.AddArg(x) + v.reset(OpAMD64VPSHLDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt64toMask8x64 x) - // result: (VPMOVMToVec8x64 (KMOVQk x)) + // match: (VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec8x64) - v.Type = types.TypeVec512 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask16x8 x) - // result: (VPMOVMToVec16x8 (KMOVBk x)) + // match: (VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) for { - t := v.Type x := v_0 - v.reset(OpAMD64VPMOVMToVec16x8) - v.Type = types.TypeVec128 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask32x4 x) - // result: (VPMOVMToVec32x4 (KMOVBk x)) + // match: (VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec32x4) - v.Type = types.TypeVec128 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask32x8 x) - // result: (VPMOVMToVec32x8 (KMOVBk x)) + // match: (VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec32x8) - v.Type = types.TypeVec256 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask64x2 x) - // result: (VPMOVMToVec64x2 (KMOVBk x)) + // match: (VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec64x2) - v.Type = types.TypeVec128 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask64x4 x) - // result: (VPMOVMToVec64x4 (KMOVBk x)) + // match: (VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec64x4) - v.Type = types.TypeVec256 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Cvt8toMask64x8 x) - // result: (VPMOVMToVec64x8 (KMOVBk x)) + // match: (VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - t := v.Type + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPMOVMToVec64x8) - v.Type = types.TypeVec512 - v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask16x16to16 x) - // result: (KMOVWi (VPMOVVec16x16ToM x)) + // match: (VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVWi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask16x32to32 x) - // result: (KMOVDi (VPMOVVec16x32ToM x)) + // match: (VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVDi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask16x8to8 x) - // result: (KMOVBi (VPMOVVec16x8ToM x)) + // match: (VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVBi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask32x16to16 x) - // result: (KMOVWi (VPMOVVec32x16ToM x)) + // match: (VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVWi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask64x8to8 x) - // result: (KMOVBi (VPMOVVec64x8ToM x)) + // match: (VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVBi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (CvtMask8x64to64 x) - // result: (KMOVQi (VPMOVVec8x64ToM x)) + // match: (VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64KMOVQi) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(x) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpDiv16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div16 [a] x y) - // result: (Select0 (DIVW [a] x y)) + // match: (VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mask mem) for { - a := auxIntToBool(v.AuxInt) + c := auxIntToUint8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpDiv16u(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div16u x y) - // result: (Select0 (DIVWU x y)) + // match: (VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD128load {sym} [off] x y ptr mem) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpDiv32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div32 [a] x y) - // result: (Select0 (DIVL [a] x y)) + // match: (VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD256load {sym} [off] x y ptr mem) for { - a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpDiv32u(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div32u x y) - // result: (Select0 (DIVLU x y)) + // match: (VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD512load {sym} [off] x y ptr mem) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpDiv64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div64 [a] x y) - // result: (Select0 (DIVQ [a] x y)) + // match: (VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) for { - a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpDiv64u(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div64u x y) - // result: (Select0 (DIVQU x y)) + // match: (VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpDiv8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div8 x y) - // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) + // match: (VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpDiv8u(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div8u x y) - // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) + // match: (VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ128load {sym} [off] x y ptr mem) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpEq16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Eq16 x y) - // result: (SETEQ (CMPW x y)) + // match: (VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ256load {sym} [off] x y ptr mem) for { x := v_0 y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpEq32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Eq32 x y) - // result: (SETEQ (CMPL x y)) + // match: (VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ512load {sym} [off] x y ptr mem) for { x := v_0 y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpEq32F(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Eq32F x y) - // result: (SETEQF (UCOMISS x y)) + // match: (VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 y := v_1 - v.reset(OpAMD64SETEQF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEq64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Eq64 x y) - // result: (SETEQ (CMPQ x y)) + // match: (VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEq64F(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Eq64F x y) - // result: (SETEQF (UCOMISD x y)) + // match: (VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 y := v_1 - v.reset(OpAMD64SETEQF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEq8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Eq8 x y) - // result: (SETEQ (CMPB x y)) + // match: (VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpEqB(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (EqB x y) - // result: (SETEQ (CMPB x y)) + // match: (VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqPtr(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (EqPtr x y) - // result: (SETEQ (CMPQ x y)) + // match: (VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) + // match: (VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (EqualFloat32x4 x y) - // result: (VCMPPS128 [0] x y) + // match: (VPSLLD128 x (MOVQconst [c])) + // result: (VPSLLD128const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (EqualFloat32x8 x y) - // result: (VCMPPS256 [0] x y) + // match: (VPSLLD256 x (MOVQconst [c])) + // result: (VPSLLD256const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (EqualFloat64x2 x y) - // result: (VCMPPD128 [0] x y) + // match: (VPSLLD512 x (MOVQconst [c])) + // result: (VPSLLD512const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool { v_0 := v.Args[0] - // match: (EqualFloat64x4 x y) - // result: (VCMPPD256 [0] x y) + // match: (VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) + // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y)) + // match: (VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y)) + // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) + // match: (VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y)) + // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y)) + // match: (VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y)) + // match: (VPSLLQ128 x (MOVQconst [c])) + // result: (VPSLLQ128const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) + // match: (VPSLLQ256 x (MOVQconst [c])) + // result: (VPSLLQ256const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y)) + // match: (VPSLLQ512 x (MOVQconst [c])) + // result: (VPSLLQ512const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat32x16 x mask) - // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VEXPANDPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat32x4 x mask) - // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VEXPANDPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat32x8 x mask) - // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VEXPANDPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat64x2 x mask) - // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VEXPANDPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat64x4 x mask) - // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VEXPANDPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandFloat64x8 x mask) - // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM mask)) + // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VEXPANDPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt16x16 x mask) - // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM mask)) + // match: (VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt16x32 x mask) - // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM mask)) + // match: (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVD512load {sym} [off] x ptr mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt16x8 x mask) - // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM mask)) + // match: (VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt32x16 x mask) - // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt32x4 x mask) - // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt32x8 x mask) - // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQ512load {sym} [off] x ptr mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt64x2 x mask) - // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt64x4 x mask) - // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt64x8 x mask) - // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) + // match: (VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt8x16 x mask) - // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM mask)) + // match: (VPSLLW128 x (MOVQconst [c])) + // result: (VPSLLW128const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt8x32 x mask) - // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM mask)) + // match: (VPSLLW256 x (MOVQconst [c])) + // result: (VPSLLW256const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandInt8x64 x mask) - // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM mask)) + // match: (VPSLLW512 x (MOVQconst [c])) + // result: (VPSLLW512const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint16x16 x mask) - // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM mask)) + // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint16x32 x mask) - // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM mask)) + // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint16x8 x mask) - // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM mask)) + // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint32x16 x mask) - // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM mask)) + // match: (VPSRAD128 x (MOVQconst [c])) + // result: (VPSRAD128const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint32x4 x mask) - // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM mask)) + // match: (VPSRAD256 x (MOVQconst [c])) + // result: (VPSRAD256const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint32x8 x mask) - // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM mask)) + // match: (VPSRAD512 x (MOVQconst [c])) + // result: (VPSRAD512const [uint8(c)] x) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint64x2 x mask) - // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) + // match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint64x4 x mask) - // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) + // match: (VPSRADMasked128 x (MOVQconst [c]) mask) + // result: (VPSRADMasked128const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint64x8 x mask) - // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) - for { - x := v_0 + // match: (VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPEXPANDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint8x16 x mask) - // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM mask)) + // match: (VPSRADMasked256 x (MOVQconst [c]) mask) + // result: (VPSRADMasked256const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint8x32 x mask) - // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM mask)) + // match: (VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ExpandUint8x64 x mask) - // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM mask)) + // match: (VPSRADMasked512 x (MOVQconst [c]) mask) + // result: (VPSRADMasked512const [uint8(c)] x mask) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPEXPANDBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpFMA(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FMA x y z) - // result: (VFMADD231SD z x y) + // match: (VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - z := v_2 - v.reset(OpAMD64VFMADD231SD) - v.AddArg3(z, x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloor(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Floor x) - // result: (ROUNDSD [1] x) + // match: (VPSRAQ128 x (MOVQconst [c])) + // result: (VPSRAQ128const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool { v_0 := v.Args[0] - // match: (FloorFloat32x4 x) - // result: (VROUNDPS128 [1] x) + // match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorFloat32x8 x) - // result: (VROUNDPS256 [1] x) + // match: (VPSRAQ256 x (MOVQconst [c])) + // result: (VPSRAQ256const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool { v_0 := v.Args[0] - // match: (FloorFloat64x2 x) - // result: (VROUNDPD128 [1] x) + // match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorFloat64x4 x) - // result: (VROUNDPD256 [1] x) + // match: (VPSRAQ512 x (MOVQconst [c])) + // result: (VPSRAQ512const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool { v_0 := v.Args[0] - // match: (FloorScaledFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+1] x) + // match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+1] x) + // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked128const [uint8(c)] x mask) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+1] x) + // match: (VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+1] x) + // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked256const [uint8(c)] x mask) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+1] x) + // match: (VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+1] x) + // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked512const [uint8(c)] x mask) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+1] x) + // match: (VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+1] x) + // match: (VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVD512load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+1] x) + // match: (VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked128load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+1] x) + // match: (VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked256load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+1] x) + // match: (VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked512load {sym} [off] x ptr mask mem) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (FloorScaledResidueFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+1] x) + // match: (VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ128load {sym} [off] x ptr mem) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = uint8ToAuxInt(a + 1) - v.AddArg(x) - return true - } -} -func rewriteValueAMD64_OpGetG(v *Value) bool { - v_0 := v.Args[0] - // match: (GetG mem) - // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal - // result: (LoweredGetG mem) - for { - mem := v_0 - if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64LoweredGetG) - v.AddArg(mem) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiFloat32x16 x) - // result: (VEXTRACTF64X4256 [1] x) + // match: (VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ256load {sym} [off] x ptr mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTF64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiFloat32x8 x) - // result: (VEXTRACTF128128 [1] x) + // match: (VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ512load {sym} [off] x ptr mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiFloat64x4 x) - // result: (VEXTRACTF128128 [1] x) + // match: (VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiFloat64x8 x) - // result: (VEXTRACTF64X4256 [1] x) + // match: (VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTF64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt16x16 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt16x32 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRAW128 x (MOVQconst [c])) + // result: (VPSRAW128const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt32x16 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRAW256 x (MOVQconst [c])) + // result: (VPSRAW256const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt32x8 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRAW512 x (MOVQconst [c])) + // result: (VPSRAW512const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt64x4 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked128const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt64x8 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked256const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt8x32 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked512const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiInt8x64 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRLD128 x (MOVQconst [c])) + // result: (VPSRLD128const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint16x16 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRLD256 x (MOVQconst [c])) + // result: (VPSRLD256const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint16x32 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRLD512 x (MOVQconst [c])) + // result: (VPSRLD512const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { v_0 := v.Args[0] - // match: (GetHiUint32x16 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLD512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint32x8 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked128const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint64x4 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint64x8 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked256const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint8x32 x) - // result: (VEXTRACTI128128 [1] x) + // match: (VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetHiUint8x64 x) - // result: (VEXTRACTI64X4256 [1] x) + // match: (VPSRLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked512const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoFloat32x16 x) - // result: (VEXTRACTF64X4256 [0] x) + // match: (VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTF64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoFloat32x8 x) - // result: (VEXTRACTF128128 [0] x) + // match: (VPSRLQ128 x (MOVQconst [c])) + // result: (VPSRLQ128const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = uint8ToAuxInt(0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoFloat64x4 x) - // result: (VEXTRACTF128128 [0] x) + // match: (VPSRLQ256 x (MOVQconst [c])) + // result: (VPSRLQ256const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = uint8ToAuxInt(0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoFloat64x8 x) - // result: (VEXTRACTF64X4256 [0] x) + // match: (VPSRLQ512 x (MOVQconst [c])) + // result: (VPSRLQ512const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTF64X4256) - v.AuxInt = uint8ToAuxInt(0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { v_0 := v.Args[0] - // match: (GetLoInt16x16 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt16x32 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked128const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt32x16 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt32x8 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked256const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt64x4 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt64x8 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked512const [uint8(c)] x mask) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt8x32 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoInt8x64 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVD512load {sym} [off] x ptr mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint16x16 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint16x32 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint32x16 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint32x8 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQ512load {sym} [off] x ptr mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint64x4 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint64x8 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint8x32 x) - // result: (VEXTRACTI128128 [0] x) + // match: (VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg(x) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GetLoUint8x64 x) - // result: (VEXTRACTI64X4256 [0] x) + // match: (VPSRLW128 x (MOVQconst [c])) + // result: (VPSRLW128const [uint8(c)] x) for { x := v_0 - v.reset(OpAMD64VEXTRACTI64X4256) - v.AuxInt = uint8ToAuxInt(0) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y)) + // match: (VPSRLW256 x (MOVQconst [c])) + // result: (VPSRLW256const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterEqualFloat32x4 x y) - // result: (VCMPPS128 [13] x y) + // match: (VPSRLW512 x (MOVQconst [c])) + // result: (VPSRLW512const [uint8(c)] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(13) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterEqualFloat32x8 x y) - // result: (VCMPPS256 [13] x y) + // match: (VPSRLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked128const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(13) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterEqualFloat64x2 x y) - // result: (VCMPPD128 [13] x y) + // match: (VPSRLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked256const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(13) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterEqualFloat64x4 x y) - // result: (VCMPPD256 [13] x y) + // match: (VPSRLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked512const [uint8(c)] x mask) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(13) - v.AddArg2(x, y) + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) + // match: (VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBD512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) + // match: (VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) + // match: (VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) + // match: (VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) + // match: (VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) + // match: (VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) + // match: (VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) + // match: (VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) + // match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y)) + // match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterFloat32x4 x y) - // result: (VCMPPS128 [14] x y) + // match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(14) - v.AddArg2(x, y) + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterFloat32x8 x y) - // result: (VCMPPS256 [14] x y) + // match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(14) - v.AddArg2(x, y) + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterFloat64x2 x y) - // result: (VCMPPD128 [14] x y) + // match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(14) - v.AddArg2(x, y) + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterFloat64x4 x y) - // result: (VCMPPD256 [14] x y) + // match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(uint8(c)),off)] x y ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(14) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool { + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPTERNLOGQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y)) + // match: (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHDQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y)) + // match: (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHQDQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y)) + // match: (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLDQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) + // match: (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLQDQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y)) + // match: (VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORD512load {sym} [off] x ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) + // match: (VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked128load {sym} [off] x ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) + // match: (VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked256load {sym} [off] x ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) + // match: (VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) + // match: (VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQ512load {sym} [off] x ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool { - b := v.Block - typ := &b.Func.Config.Types - // match: (HasCPUFeature {s}) - // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) +func rewriteValueAMD64_OpAMD64VPXORQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked128load {sym} [off] x ptr mask mem) for { - s := auxToSym(v.Aux) - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64) - v1.Aux = symToAux(s) - v0.AddArg(v1) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpIsInBounds(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (IsInBounds idx len) - // result: (SETB (CMPQ idx len)) + // match: (VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked256load {sym} [off] x ptr mask mem) for { - idx := v_0 - len := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(idx, len) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (IsNaNFloat32x16 x) - // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) + // match: (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, x) - v.AddArg(v0) - return true + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break } + return false } -func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { v_0 := v.Args[0] - // match: (IsNaNFloat32x4 x) - // result: (VCMPPS128 [3] x x) + // match: (VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD128load {sym} [off] ptr mem) for { - x := v_0 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, x) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PD256(v *Value) bool { v_0 := v.Args[0] - // match: (IsNaNFloat32x8 x) - // result: (VCMPPS256 [3] x x) + // match: (VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD256load {sym} [off] ptr mem) for { - x := v_0 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, x) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PD512(v *Value) bool { v_0 := v.Args[0] - // match: (IsNaNFloat64x2 x) - // result: (VCMPPD128 [3] x x) - for { - x := v_0 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, x) - return true - } -} -func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (IsNaNFloat64x4 x) - // result: (VCMPPD256 [3] x x) + // match: (VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD512load {sym} [off] ptr mem) for { - x := v_0 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, x) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (IsNaNFloat64x8 x) - // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) + // match: (VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, x) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpIsNonNil(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (IsNonNil p) - // result: (SETNE (TESTQ p p)) + // match: (VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked256load {sym} [off] ptr mask mem) for { - p := v_0 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags) - v0.AddArg2(p, p) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (IsSliceInBounds idx len) - // result: (SETBE (CMPQ idx len)) + // match: (VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked512load {sym} [off] ptr mask mem) for { - idx := v_0 - len := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(idx, len) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpIsZeroVec(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (IsZeroVec x) - // result: (SETEQ (VPTEST x x)) + // match: (VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PS512load {sym} [off] ptr mem) for { - x := v_0 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(x, x) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLeq16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq16 x y) - // result: (SETLE (CMPW x y)) + // match: (VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq16U(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq16U x y) - // result: (SETBE (CMPW x y)) + // match: (VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked256load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq32 x y) - // result: (SETLE (CMPL x y)) + // match: (VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq32F(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Leq32F x y) - // result: (SETGEF (UCOMISS y x)) + // match: (VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETGEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLeq32U(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Leq32U x y) - // result: (SETBE (CMPL x y)) + // match: (VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLeq64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Leq64 x y) - // result: (SETLE (CMPQ x y)) + // match: (VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLeq64F(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq64F x y) - // result: (SETGEF (UCOMISD y x)) + // match: (VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETGEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq64U(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq64U x y) - // result: (SETBE (CMPQ x y)) + // match: (VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Leq8 x y) - // result: (SETLE (CMPB x y)) + // match: (VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLeq8U(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Leq8U x y) - // result: (SETBE (CMPB x y)) + // match: (VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPS128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Less16 x y) - // result: (SETL (CMPW x y)) + // match: (VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPS256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess16U(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Less16U x y) - // result: (SETB (CMPW x y)) + // match: (VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less32 x y) - // result: (SETL (CMPL x y)) + // match: (VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLess32F(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less32F x y) - // result: (SETGF (UCOMISS y x)) + // match: (VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETGF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLess32U(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less32U x y) - // result: (SETB (CMPL x y)) + // match: (VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VREDUCEPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLess64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Less64 x y) - // result: (SETL (CMPQ x y)) + // match: (VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess64F(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Less64F x y) - // result: (SETGF (UCOMISD y x)) + // match: (VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETGF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess64U(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Less64U x y) - // result: (SETB (CMPQ x y)) + // match: (VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLess8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less8 x y) - // result: (SETL (CMPB x y)) + // match: (VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLess8U(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less8U x y) - // result: (SETB (CMPB x y)) + // match: (VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) + // match: (VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool { v_0 := v.Args[0] - // match: (LessEqualFloat32x4 x y) - // result: (VCMPPS128 [2] x y) + // match: (VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg2(x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPS128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { v_0 := v.Args[0] - // match: (LessEqualFloat32x8 x y) - // result: (VCMPPS256 [2] x y) + // match: (VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg2(x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPS256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool { v_0 := v.Args[0] - // match: (LessEqualFloat64x2 x y) - // result: (VCMPPD128 [2] x y) + // match: (VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg2(x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessEqualFloat64x4 x y) - // result: (VCMPPD256 [2] x y) + // match: (VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(2) - v.AddArg2(x, y) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) + // match: (VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) + // match: (VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(uint8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRNDSCALEPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRSQRT14PD128(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) + // match: (VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD128load {sym} [off] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRSQRT14PD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) + // match: (VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD256load {sym} [off] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRSQRT14PD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) + // match: (VRSQRT14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD512load {sym} [off] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) + // match: (VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) + // match: (VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) + // match: (VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRSQRT14PS512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) + // match: (VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PS512load {sym} [off] ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) + // match: (VRSQRT14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat32x4 x y) - // result: (VCMPPS128 [1] x y) + // match: (VRSQRT14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked256load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat32x8 x y) - // result: (VCMPPS256 [1] x y) + // match: (VRSQRT14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat64x2 x y) - // result: (VCMPPD128 [1] x y) + // match: (VSCALEFPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD128load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat64x4 x y) - // result: (VCMPPD256 [1] x y) + // match: (VSCALEFPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD256load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) + // match: (VSCALEFPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) + // match: (VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) + // match: (VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) + // match: (VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) + // match: (VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS128load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) + // match: (VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS256load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) + // match: (VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } + return false } -func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) + // match: (VSCALEFPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLessUint8x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) + // match: (VSCALEFPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } + return false } -func rewriteValueAMD64_OpLoad(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Load ptr mem) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (MOVQload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64MOVQload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is32BitInt(t) - // result: (MOVLload ptr mem) + // match: (VSCALEFPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is32BitInt(t)) { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64MOVLload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is16BitInt(t) - // result: (MOVWload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is16BitInt(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVWload) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VSCALEFPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (Load ptr mem) - // cond: (t.IsBoolean() || is8BitInt(t)) - // result: (MOVBload ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VSHUFPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VSHUFPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSHUFPD512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.IsBoolean() || is8BitInt(t)) { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64MOVBload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is32BitFloat(t) - // result: (MOVSSload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is32BitFloat(t)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64MOVSSload) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VSHUFPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (Load ptr mem) - // cond: is64BitFloat(t) - // result: (MOVSDload ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VSHUFPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VSHUFPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSHUFPS512load {sym} [makeValAndOff(int32(uint8(c)),off)] x ptr mem) for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is64BitFloat(t)) { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64MOVSDload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: t.Size() == 16 - // result: (VMOVDQUload128 ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 16) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVDQUload128) - v.AddArg2(ptr, mem) + v.reset(OpAMD64VSHUFPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(uint8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (Load ptr mem) - // cond: t.Size() == 32 - // result: (VMOVDQUload256 ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VSQRTPD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPD512load {sym} [off] ptr mem) for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 32) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64VMOVDQUload256) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: t.Size() == 64 - // result: (VMOVDQUload512 ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 64) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVDQUload512) + v.reset(OpAMD64VSQRTPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpLoadMasked16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (LoadMasked16 ptr mask mem) - // cond: t.Size() == 64 - // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM mask) mem) + // match: (VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked128load {sym} [off] ptr mask mem) for { - t := v.Type - ptr := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - mem := v_2 - if !(t.Size() == 64) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMASK16load512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64VSQRTPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLoadMasked32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (LoadMasked32 ptr mask mem) - // cond: t.Size() == 16 - // result: (VPMASK32load128 ptr mask mem) + // match: (VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked256load {sym} [off] ptr mask mem) for { - t := v.Type - ptr := v_0 - mask := v_1 - mem := v_2 - if !(t.Size() == 16) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64VPMASK32load128) - v.AddArg3(ptr, mask, mem) - return true - } - // match: (LoadMasked32 ptr mask mem) - // cond: t.Size() == 32 - // result: (VPMASK32load256 ptr mask mem) - for { - t := v.Type - ptr := v_0 + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - mem := v_2 - if !(t.Size() == 32) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMASK32load256) + v.reset(OpAMD64VSQRTPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true } - // match: (LoadMasked32 ptr mask mem) - // cond: t.Size() == 64 - // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM mask) mem) - for { - t := v.Type - ptr := v_0 - mask := v_1 - mem := v_2 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VPMASK32load512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(ptr, v0, mem) - return true - } return false } -func rewriteValueAMD64_OpLoadMasked64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (LoadMasked64 ptr mask mem) - // cond: t.Size() == 16 - // result: (VPMASK64load128 ptr mask mem) + // match: (VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked512load {sym} [off] ptr mask mem) for { - t := v.Type - ptr := v_0 - mask := v_1 - mem := v_2 - if !(t.Size() == 16) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64VPMASK64load128) - v.AddArg3(ptr, mask, mem) - return true - } - // match: (LoadMasked64 ptr mask mem) - // cond: t.Size() == 32 - // result: (VPMASK64load256 ptr mask mem) - for { - t := v.Type - ptr := v_0 + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - mem := v_2 - if !(t.Size() == 32) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMASK64load256) + v.reset(OpAMD64VSQRTPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true } - // match: (LoadMasked64 ptr mask mem) - // cond: t.Size() == 64 - // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM mask) mem) + return false +} +func rewriteValueAMD64_OpAMD64VSQRTPS512(v *Value) bool { + v_0 := v.Args[0] + // match: (VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPS512load {sym} [off] ptr mem) for { - t := v.Type - ptr := v_0 - mask := v_1 - mem := v_2 - if !(t.Size() == 64) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64VPMASK64load512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(ptr, v0, mem) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpLoadMasked8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (LoadMasked8 ptr mask mem) - // cond: t.Size() == 64 - // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM mask) mem) + // match: (VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked128load {sym} [off] ptr mask mem) for { - t := v.Type - ptr := v_0 + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_1 - mem := v_2 - if !(t.Size() == 64) { + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPMASK8load512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(ptr, v0, mem) + v.reset(OpAMD64VSQRTPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLocalAddr(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSQRTPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LocalAddr {sym} base mem) - // cond: t.Elem().HasPointers() - // result: (LEAQ {sym} (SPanchored base mem)) + // match: (VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked256load {sym} [off] ptr mask mem) for { - t := v.Type - sym := auxToSym(v.Aux) - base := v_0 - mem := v_1 - if !(t.Elem().HasPointers()) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64LEAQ) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPSMasked256load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr) - v0.AddArg2(base, mem) - v.AddArg(v0) + v.AddArg3(ptr, mask, mem) return true } - // match: (LocalAddr {sym} base _) - // cond: !t.Elem().HasPointers() - // result: (LEAQ {sym} base) + return false +} +func rewriteValueAMD64_OpAMD64VSQRTPSMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VSQRTPSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked512load {sym} [off] ptr mask mem) for { - t := v.Type - sym := auxToSym(v.Aux) - base := v_0 - if !(!t.Elem().HasPointers()) { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64LEAQ) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPSMasked512load) + v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg(base) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh16x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPD512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh16x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh16x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh16x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh32x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPS512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPS512load {sym} [off] x ptr mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh32x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh32x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked128load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh32x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh32x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked256load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh32x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh32x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (VSUBPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked512load {sym} [off] x ptr mask mem) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh32x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VSUBPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpLsh64x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh64x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPWconst y [64]))) + // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XADDLlock [off1+off2] {sym} val ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh64x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + v.reset(OpAMD64XADDLlock) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh64x32(v *Value) bool { +func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh64x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPLconst y [64]))) + // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XADDQlock [off1+off2] {sym} val ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh64x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + v.reset(OpAMD64XADDQlock) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh64x64(v *Value) bool { +func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh64x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPQconst y [64]))) + // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XCHGL [off1+off2] {sym} val ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XCHGL) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) return true } - // match: (Lsh64x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) + // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB + // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + break + } + v.reset(OpAMD64XCHGL) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh64x8(v *Value) bool { +func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh64x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPBconst y [64]))) + // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XCHGQ [off1+off2] {sym} val ptr mem) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XCHGQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) return true } - // match: (Lsh64x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) + // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB + // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { break } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + break + } + v.reset(OpAMD64XCHGQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, ptr, mem) return true } return false } -func rewriteValueAMD64_OpLsh8x16(v *Value) bool { +func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Lsh8x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (XORL (SHLL (MOVLconst [1]) y) x) + // result: (BTCL x y) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLL { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTCL) + v.AddArg2(x, y) + return true } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true + break } - // match: (Lsh8x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) + // match: (XORL x (MOVLconst [c])) + // result: (XORLconst [c] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true + } + break + } + // match: (XORL x x) + // result: (MOVLconst [0]) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + if x != v_1 { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } + // match: (XORL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (XORLload x [off] {sym} ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVLload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(OpAMD64XORLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + // match: (XORL x (ADDLconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSMSKL x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSMSKL) + v.AddArg(x) + return true + } + break + } return false } -func rewriteValueAMD64_OpLsh8x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh8x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (XORLconst [1] (SETNE x)) + // result: (SETEQ x) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + x := v_0.Args[0] + v.reset(OpAMD64SETEQ) + v.AddArg(x) return true } - // match: (Lsh8x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) + // match: (XORLconst [1] (SETEQ x)) + // result: (SETNE x) for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + x := v_0.Args[0] + v.reset(OpAMD64SETNE) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpLsh8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Lsh8x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (XORLconst [1] (SETL x)) + // result: (SETGE x) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + x := v_0.Args[0] + v.reset(OpAMD64SETGE) + v.AddArg(x) return true } - // match: (Lsh8x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) + // match: (XORLconst [1] (SETGE x)) + // result: (SETL x) for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + x := v_0.Args[0] + v.reset(OpAMD64SETL) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpLsh8x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Lsh8x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (XORLconst [1] (SETLE x)) + // result: (SETG x) for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + x := v_0.Args[0] + v.reset(OpAMD64SETG) + v.AddArg(x) return true } - // match: (Lsh8x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) + // match: (XORLconst [1] (SETG x)) + // result: (SETLE x) for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG { break } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + x := v_0.Args[0] + v.reset(OpAMD64SETLE) + v.AddArg(x) return true } - return false -} -func rewriteValueAMD64_OpMax32F(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Max32F x y) - // result: (Neg32F (Min32F (Neg32F x) (Neg32F y))) + // match: (XORLconst [1] (SETB x)) + // result: (SETAE x) for { - t := v.Type - x := v_0 - y := v_1 - v.reset(OpNeg32F) - v.Type = t - v0 := b.NewValue0(v.Pos, OpMin32F, t) - v1 := b.NewValue0(v.Pos, OpNeg32F, t) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpNeg32F, t) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETAE) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpMax64F(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Max64F x y) - // result: (Neg64F (Min64F (Neg64F x) (Neg64F y))) + // match: (XORLconst [1] (SETAE x)) + // result: (SETB x) for { - t := v.Type - x := v_0 - y := v_1 - v.reset(OpNeg64F) - v.Type = t - v0 := b.NewValue0(v.Pos, OpMin64F, t) - v1 := b.NewValue0(v.Pos, OpNeg64F, t) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpNeg64F, t) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETB) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpMin32F(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Min32F x y) - // result: (POR (MINSS (MINSS x y) x) (MINSS x y)) + // match: (XORLconst [1] (SETBE x)) + // result: (SETA x) for { - t := v.Type - x := v_0 - y := v_1 - v.reset(OpAMD64POR) - v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t) - v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t) - v1.AddArg2(x, y) - v0.AddArg2(v1, x) - v.AddArg2(v0, v1) + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETA) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpMin64F(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Min64F x y) - // result: (POR (MINSD (MINSD x y) x) (MINSD x y)) + // match: (XORLconst [1] (SETA x)) + // result: (SETBE x) for { - t := v.Type - x := v_0 - y := v_1 - v.reset(OpAMD64POR) - v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t) - v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t) - v1.AddArg2(x, y) - v0.AddArg2(v1, x) - v.AddArg2(v0, v1) + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETBE) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpMod16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod16 [a] x y) - // result: (Select1 (DIVW [a] x y)) + // match: (XORLconst [c] (XORLconst [d] x)) + // result: (XORLconst [c ^ d] x) for { - a := auxIntToBool(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64XORLconst { + break + } + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(c ^ d) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpMod16u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod16u x y) - // result: (Select1 (DIVWU x y)) + // match: (XORLconst [0] x) + // result: x for { + if auxIntToInt32(v.AuxInt) != 0 { + break + } x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v0.AddArg2(x, y) - v.AddArg(v0) + v.copyOf(x) return true } -} -func rewriteValueAMD64_OpMod32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod32 [a] x y) - // result: (Select1 (DIVL [a] x y)) + // match: (XORLconst [c] (MOVLconst [d])) + // result: (MOVLconst [c^d]) for { - a := auxIntToBool(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { + break + } + d := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(c ^ d) return true } + return false } -func rewriteValueAMD64_OpMod32u(v *Value) bool { +func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod32u x y) - // result: (Select1 (DIVLU x y)) + // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) - v0.AddArg2(x, y) - v.AddArg(v0) + valoff1 := auxIntToValAndOff(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) return true } -} -func rewriteValueAMD64_OpMod64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod64 [a] x y) - // result: (Select1 (DIVQ [a] x y)) + // match: (XORLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - a := auxIntToBool(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + valoff1 := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORLconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } + return false } -func rewriteValueAMD64_OpMod64u(v *Value) bool { +func rewriteValueAMD64_OpAMD64XORLload(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Mod64u x y) - // result: (Select1 (DIVQU x y)) + // match: (XORLload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XORLload [off1+off2] {sym} val base mem) for { - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) - v0.AddArg2(x, y) - v.AddArg(v0) + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XORLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } -} -func rewriteValueAMD64_OpMod8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod8 x y) - // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) + // match: (XORLload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (XORLload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORLload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } -} -func rewriteValueAMD64_OpMod8u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod8u x y) - // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) + // match: (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) + // result: (XORL x (MOVLf2i y)) for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) x := v_0 - y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + ptr := v_1 + if v_2.Op != OpAMD64MOVSSstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { + break + } + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64XORL) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVLf2i, typ.UInt32) + v0.AddArg(y) + v.AddArg2(x, v0) return true } + return false } -func rewriteValueAMD64_OpMove(v *Value) bool { +func rewriteValueAMD64_OpAMD64XORLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Move [0] _ _ mem) - // result: mem + // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XORLmodify [off1+off2] {sym} base val mem) for { - if auxIntToInt64(v.AuxInt) != 0 { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { break } + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.copyOf(mem) - return true - } - // match: (Move [1] dst src mem) - // result: (MOVBstore dst (MOVBload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 1 { + if !(is32Bit(int64(off1) + int64(off2))) { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) + v.reset(OpAMD64XORLmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (Move [2] dst src mem) - // result: (MOVWstore dst (MOVWload src mem) mem) + // match: (XORLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (XORLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - if auxIntToInt64(v.AuxInt) != 2 { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - dst := v_0 - src := v_1 + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 - v.reset(OpAMD64MOVWstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORLmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } - // match: (Move [4] dst src mem) - // result: (MOVLstore dst (MOVLload src mem) mem) + return false +} +func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XORQ (SHLQ (MOVQconst [1]) y) x) + // result: (BTCQ x y) for { - if auxIntToInt64(v.AuxInt) != 4 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLQ { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTCQ) + v.AddArg2(x, y) + return true } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true + break } - // match: (Move [8] dst src mem) - // result: (MOVQstore dst (MOVQload src mem) mem) + // match: (XORQ (MOVQconst [c]) x) + // cond: isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31 + // result: (BTCQconst [int8(log64u(uint64(c)))] x) for { - if auxIntToInt64(v.AuxInt) != 8 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(isPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) { + continue + } + v.reset(OpAMD64BTCQconst) + v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v.AddArg(x) + return true } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVQstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true + break } - // match: (Move [16] dst src mem) - // result: (MOVOstore dst (MOVOload src mem) mem) + // match: (XORQ x (MOVQconst [c])) + // cond: is32Bit(c) + // result: (XORQconst [int32(c)] x) for { - if auxIntToInt64(v.AuxInt) != 16 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + if !(is32Bit(c)) { + continue + } + v.reset(OpAMD64XORQconst) + v.AuxInt = int32ToAuxInt(int32(c)) + v.AddArg(x) + return true } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVOstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true + break } - // match: (Move [3] dst src mem) - // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) + // match: (XORQ x x) + // result: (MOVLconst [0]) for { - if auxIntToInt64(v.AuxInt) != 3 { + x := v_0 + if x != v_1 { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(2) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(2) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) return true } - // match: (Move [5] dst src mem) - // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) + // match: (XORQ x l:(MOVQload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (XORQload x [off] {sym} ptr mem) for { - if auxIntToInt64(v.AuxInt) != 5 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVQload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(OpAMD64XORQload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(4) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(4) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) - return true + break } - // match: (Move [6] dst src mem) - // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) + // match: (XORQ x (ADDQconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSMSKQ x) for { - if auxIntToInt64(v.AuxInt) != 6 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSMSKQ) + v.AddArg(x) + return true } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(4) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AuxInt = int32ToAuxInt(4) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) - return true + break } - // match: (Move [7] dst src mem) - // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) + return false +} +func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { + v_0 := v.Args[0] + // match: (XORQconst [c] (XORQconst [d] x)) + // result: (XORQconst [c ^ d] x) for { - if auxIntToInt64(v.AuxInt) != 7 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64XORQconst { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(3) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(3) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64XORQconst) + v.AuxInt = int32ToAuxInt(c ^ d) + v.AddArg(x) return true } - // match: (Move [9] dst src mem) - // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + // match: (XORQconst [0] x) + // result: x for { - if auxIntToInt64(v.AuxInt) != 9 { + if auxIntToInt32(v.AuxInt) != 0 { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + x := v_0 + v.copyOf(x) return true } - // match: (Move [10] dst src mem) - // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + // match: (XORQconst [c] (MOVQconst [d])) + // result: (MOVQconst [int64(c)^d]) for { - if auxIntToInt64(v.AuxInt) != 10 { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVQconst { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + d := auxIntToInt64(v_0.AuxInt) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(int64(c) ^ d) return true } - // match: (Move [11] dst src mem) - // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) + return false +} +func rewriteValueAMD64_OpAMD64XORQconstmodify(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XORQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (XORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) for { - if auxIntToInt64(v.AuxInt) != 11 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(7) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(7) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + valoff1 := auxIntToValAndOff(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(base, mem) return true } - // match: (Move [12] dst src mem) - // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + // match: (XORQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) + // result: (XORQconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) for { - if auxIntToInt64(v.AuxInt) != 12 { + valoff1 := auxIntToValAndOff(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { break } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + mem := v_1 + if !(ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = valAndOffToAuxInt(ValAndOff(valoff1).addOffset32(off2)) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(base, mem) return true } - // match: (Move [s] dst src mem) - // cond: s >= 13 && s <= 15 - // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) + return false +} +func rewriteValueAMD64_OpAMD64XORQload(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (XORQload [off1] {sym} val (ADDQconst [off2] base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XORQload [off1+off2] {sym} val base mem) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + base := v_1.Args[0] mem := v_2 - if !(s >= 13 && s <= 15) { + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpAMD64MOVQstore) - v.AuxInt = int32ToAuxInt(int32(s - 8)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(int32(s - 8)) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + v.reset(OpAMD64XORQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, base, mem) return true } - // match: (Move [s] dst src mem) - // cond: s > 16 && s < 192 && logLargeCopy(v, s) - // result: (LoweredMove [s] dst src mem) + // match: (XORQload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (XORQload [off1+off2] {mergeSym(sym1,sym2)} val base mem) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + base := v_1.Args[0] mem := v_2 - if !(s > 16 && s < 192 && logLargeCopy(v, s)) { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64LoweredMove) - v.AuxInt = int64ToAuxInt(s) - v.AddArg3(dst, src, mem) + v.reset(OpAMD64XORQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, base, mem) return true } - // match: (Move [s] dst src mem) - // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s) - // result: (LoweredMoveLoop [s] dst src mem) + // match: (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) + // result: (XORQ x (MOVQf2i y)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + x := v_0 + ptr := v_1 + if v_2.Op != OpAMD64MOVSDstore || auxIntToInt32(v_2.AuxInt) != off || auxToSym(v_2.Aux) != sym { break } - v.reset(OpAMD64LoweredMoveLoop) - v.AuxInt = int64ToAuxInt(s) - v.AddArg3(dst, src, mem) + y := v_2.Args[1] + if ptr != v_2.Args[0] { + break + } + v.reset(OpAMD64XORQ) + v0 := b.NewValue0(v_2.Pos, OpAMD64MOVQf2i, typ.UInt64) + v0.AddArg(y) + v.AddArg2(x, v0) return true } - // match: (Move [s] dst src mem) - // cond: s > repMoveThreshold && s%8 != 0 - // result: (Move [s-s%8] (OffPtr dst [s%8]) (OffPtr src [s%8]) (MOVQstore dst (MOVQload src mem) mem)) + return false +} +func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XORQmodify [off1+off2] {sym} base val mem) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + base := v_0.Args[0] + val := v_1 mem := v_2 - if !(s > repMoveThreshold && s%8 != 0) { + if !(is32Bit(int64(off1) + int64(off2))) { break } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(s - s%8) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(s % 8) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(s % 8) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v3.AddArg2(src, mem) - v2.AddArg3(dst, v3, mem) - v.AddArg3(v0, v1, v2) + v.reset(OpAMD64XORQmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(base, val, mem) return true } - // match: (Move [s] dst src mem) - // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s) - // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem) + // match: (XORQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) + // result: (XORQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + base := v_0.Args[0] + val := v_1 mem := v_2 - if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) { + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64REPMOVSQ) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(s / 8) - v.AddArg4(dst, src, v0, mem) + v.reset(OpAMD64XORQmodify) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(base, val, mem) return true } return false } -func rewriteValueAMD64_OpNeg32F(v *Value) bool { +func rewriteValueAMD64_OpAddr(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Neg32F x) - // result: (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) + // match: (Addr {sym} base) + // result: (LEAQ {sym} base) for { - x := v_0 - v.reset(OpAMD64PXOR) - v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32) - v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1))) - v.AddArg2(x, v0) + sym := auxToSym(v.Aux) + base := v_0 + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v.AddArg(base) return true } } -func rewriteValueAMD64_OpNeg64F(v *Value) bool { +func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Neg64F x) - // result: (PXOR x (MOVSDconst [math.Copysign(0, -1)])) + // match: (AtomicAdd32 ptr val mem) + // result: (AddTupleFirst32 val (XADDLlock val ptr mem)) for { - x := v_0 - v.reset(OpAMD64PXOR) - v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64) - v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1)) - v.AddArg2(x, v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64AddTupleFirst32) + v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg2(val, v0) return true } } -func rewriteValueAMD64_OpNeq16(v *Value) bool { +func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Neq16 x y) - // result: (SETNE (CMPW x y)) + typ := &b.Func.Config.Types + // match: (AtomicAdd64 ptr val mem) + // result: (AddTupleFirst64 val (XADDQlock val ptr mem)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64AddTupleFirst64) + v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg2(val, v0) return true } } -func rewriteValueAMD64_OpNeq32(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq32 x y) - // result: (SETNE (CMPL x y)) + // match: (AtomicAnd32 ptr val mem) + // result: (ANDLlock ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ANDLlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNeq32F(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq32F x y) - // result: (SETNEF (UCOMISS x y)) + // match: (AtomicAnd32value ptr val mem) + // result: (LoweredAtomicAnd32 ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicAnd32) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNeq64(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq64 x y) - // result: (SETNE (CMPQ x y)) + // match: (AtomicAnd64value ptr val mem) + // result: (LoweredAtomicAnd64 ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicAnd64) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNeq64F(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq64F x y) - // result: (SETNEF (UCOMISD x y)) + // match: (AtomicAnd8 ptr val mem) + // result: (ANDBlock ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ANDBlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNeq8(v *Value) bool { +func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq8 x y) - // result: (SETNE (CMPB x y)) + // match: (AtomicCompareAndSwap32 ptr old new_ mem) + // result: (CMPXCHGLlock ptr old new_ mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + old := v_1 + new_ := v_2 + mem := v_3 + v.reset(OpAMD64CMPXCHGLlock) + v.AddArg4(ptr, old, new_, mem) return true } } -func rewriteValueAMD64_OpNeqB(v *Value) bool { +func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (NeqB x y) - // result: (SETNE (CMPB x y)) + // match: (AtomicCompareAndSwap64 ptr old new_ mem) + // result: (CMPXCHGQlock ptr old new_ mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + old := v_1 + new_ := v_2 + mem := v_3 + v.reset(OpAMD64CMPXCHGQlock) + v.AddArg4(ptr, old, new_, mem) return true } } -func rewriteValueAMD64_OpNeqPtr(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (NeqPtr x y) - // result: (SETNE (CMPQ x y)) + // match: (AtomicExchange32 ptr val mem) + // result: (XCHGL val ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGL) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpNot(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Not x) - // result: (XORLconst [1] x) + // match: (AtomicExchange64 ptr val mem) + // result: (XCHGQ val ptr mem) for { - x := v_0 - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGQ) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) + // match: (AtomicExchange8 ptr val mem) + // result: (XCHGB val ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGB) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat32x4 x y) - // result: (VCMPPS128 [4] x y) + // match: (AtomicLoad32 ptr mem) + // result: (MOVLatomicload ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = uint8ToAuxInt(4) - v.AddArg2(x, y) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat32x8 x y) - // result: (VCMPPS256 [4] x y) + // match: (AtomicLoad64 ptr mem) + // result: (MOVQatomicload ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = uint8ToAuxInt(4) - v.AddArg2(x, y) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVQatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat64x2 x y) - // result: (VCMPPD128 [4] x y) + // match: (AtomicLoad8 ptr mem) + // result: (MOVBatomicload ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = uint8ToAuxInt(4) - v.AddArg2(x, y) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat64x4 x y) - // result: (VCMPPD256 [4] x y) + // match: (AtomicLoadPtr ptr mem) + // result: (MOVQatomicload ptr mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = uint8ToAuxInt(4) - v.AddArg2(x, y) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVQatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) + // match: (AtomicOr32 ptr val mem) + // result: (ORLlock ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ORLlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) + // match: (AtomicOr32value ptr val mem) + // result: (LoweredAtomicOr32 ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicOr32) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) + // match: (AtomicOr64value ptr val mem) + // result: (LoweredAtomicOr64 ptr val mem) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicOr64) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (AtomicOr8 ptr val mem) + // result: (ORBlock ptr val mem) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ORBlock) + v.AddArg3(ptr, val, mem) + return true + } +} +func rewriteValueAMD64_OpAtomicStore32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) + // match: (AtomicStore32 ptr val mem) + // result: (Select1 (XCHGL val ptr mem)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(val, ptr, mem) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpAtomicStore64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) + // match: (AtomicStore64 ptr val mem) + // result: (Select1 (XCHGQ val ptr mem)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem)) + v0.AddArg3(val, ptr, mem) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpAtomicStore8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) + // match: (AtomicStore8 ptr val mem) + // result: (Select1 (XCHGB val ptr mem)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem)) + v0.AddArg3(val, ptr, mem) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) + // match: (AtomicStorePtrNoWB ptr val mem) + // result: (Select1 (XCHGQ val ptr mem)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem)) + v0.AddArg3(val, ptr, mem) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) + // match: (BitLen16 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSRL (LEAL1 [1] (MOVWQZX x) (MOVWQZX x))) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSRL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, v1) + v.AddArg(v0) + return true + } + // match: (BitLen16 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVWQZX x)))) + for { + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type) + v2.AddArg(x) + v1.AddArg(v2) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen32(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (NotEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) + // match: (BitLen32 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSRQ (LEAQ1 [1] (MOVLQZX x) (MOVLQZX x)))) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64) + v1.AuxInt = int32ToAuxInt(1) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) + v2.AddArg(x) + v1.AddArg2(v2, v2) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (BitLen32 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL x))) + for { + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) + v1.AddArg(x) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpOffPtr(v *Value) bool { +func rewriteValueAMD64_OpBitLen64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (OffPtr [off] ptr) - // cond: is32Bit(off) - // result: (ADDQconst [int32(off)] ptr) + // match: (BitLen64 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (ADDQconst [1] (CMOVQEQ (Select0 (BSRQ x)) (MOVQconst [-1]) (Select1 (BSRQ x)))) for { - off := auxIntToInt64(v.AuxInt) - ptr := v_0 - if !(is32Bit(off)) { + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 < 3) { break } v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(int32(off)) - v.AddArg(ptr) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t) + v1 := b.NewValue0(v.Pos, OpSelect0, t) + v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v2.AddArg(x) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) + v3.AuxInt = int64ToAuxInt(-1) + v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4.AddArg(v2) + v0.AddArg3(v1, v3, v4) + v.AddArg(v0) return true } - // match: (OffPtr [off] ptr) - // result: (ADDQ (MOVQconst [off]) ptr) + // match: (BitLen64 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-64] (LZCNTQ x))) for { - off := auxIntToInt64(v.AuxInt) - ptr := v_0 - v.reset(OpAMD64ADDQ) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(off) - v.AddArg2(v0, ptr) + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-64) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpPopCount16(v *Value) bool { +func rewriteValueAMD64_OpBitLen8(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (PopCount16 x) - // result: (POPCNTL (MOVWQZX x)) + // match: (BitLen8 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSRL (LEAL1 [1] (MOVBQZX x) (MOVBQZX x))) for { x := v_0 - v.reset(OpAMD64POPCNTL) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) - v0.AddArg(x) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSRL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, v1) + v.AddArg(v0) + return true + } + // match: (BitLen8 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVBQZX x)))) + for { + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type) + v2.AddArg(x) + v1.AddArg(v2) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpPopCount8(v *Value) bool { +func rewriteValueAMD64_OpBswap16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (PopCount8 x) - // result: (POPCNTL (MOVBQZX x)) + // match: (Bswap16 x) + // result: (ROLWconst [8] x) for { x := v_0 - v.reset(OpAMD64POPCNTL) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) - v0.AddArg(x) - v.AddArg(v0) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(8) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEven(v *Value) bool { +func rewriteValueAMD64_OpCeil(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEven x) - // result: (ROUNDSD [0] x) + // match: (Ceil x) + // result: (ROUNDSD [2] x) for { x := v_0 v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(0) + v.AuxInt = int8ToAuxInt(2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenFloat32x4 x) - // result: (VROUNDPS128 [0] x) + // match: (CeilFloat32x4 x) + // result: (VROUNDPS128 [2] x) for { x := v_0 v.reset(OpAMD64VROUNDPS128) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenFloat32x8 x) - // result: (VROUNDPS256 [0] x) + // match: (CeilFloat32x8 x) + // result: (VROUNDPS256 [2] x) for { x := v_0 v.reset(OpAMD64VROUNDPS256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenFloat64x2 x) - // result: (VROUNDPD128 [0] x) + // match: (CeilFloat64x2 x) + // result: (VROUNDPD128 [2] x) for { x := v_0 v.reset(OpAMD64VROUNDPD128) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenFloat64x4 x) - // result: (VROUNDPD256 [0] x) + // match: (CeilFloat64x4 x) + // result: (VROUNDPD256 [2] x) for { x := v_0 v.reset(OpAMD64VROUNDPD256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+0] x) + // match: (CeilScaledFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+0] x) + // match: (CeilScaledFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+0] x) + // match: (CeilScaledFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+0] x) + // match: (CeilScaledFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+0] x) + // match: (CeilScaledFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+0] x) + // match: (CeilScaledFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+0] x) + // match: (CeilScaledResidueFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+0] x) + // match: (CeilScaledResidueFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+0] x) + // match: (CeilScaledResidueFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+0] x) + // match: (CeilScaledResidueFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+0] x) + // match: (CeilScaledResidueFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool { v_0 := v.Args[0] - // match: (RoundToEvenScaledResidueFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+0] x) + // match: (CeilScaledResidueFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+2] x) for { a := auxIntToUint8(v.AuxInt) x := v_0 v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = uint8ToAuxInt(a + 0) + v.AuxInt = uint8ToAuxInt(a + 2) v.AddArg(x) return true } } -func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool { +func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPWconst y [16]))) + // match: (CompressFloat32x16 x mask) + // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Rsh16Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool { +func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPLconst y [16]))) + // match: (CompressFloat32x4 x mask) + // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh16Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat32x8 x mask) + // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool { +func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPQconst y [16]))) + // match: (CompressFloat64x2 x mask) + // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh16Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressFloat64x4 x mask) + // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool { +func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPBconst y [16]))) + // match: (CompressFloat64x8 x mask) + // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VCOMPRESSPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh16Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt16x16 x mask) + // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x16(v *Value) bool { +func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [16]))))) + // match: (CompressInt16x32 x mask) + // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh16x16 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt16x8 x mask) + // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x32(v *Value) bool { +func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [16]))))) + // match: (CompressInt32x16 x mask) + // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh16x32 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt32x4 x mask) + // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x64(v *Value) bool { +func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [16]))))) + // match: (CompressInt32x8 x mask) + // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh16x64 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt64x2 x mask) + // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x8(v *Value) bool { +func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [16]))))) + // match: (CompressInt64x4 x mask) + // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh16x8 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt64x8 x mask) + // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool { +func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (CompressInt8x16 x mask) + // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh32Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressInt8x32 x mask) + // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool { +func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (CompressInt8x64 x mask) + // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh32Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint16x16 x mask) + // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool { +func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (CompressUint16x32 x mask) + // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Rsh32Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint16x8 x mask) + // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool { +func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (CompressUint32x16 x mask) + // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Rsh32Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x16(v *Value) bool { +func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [32]))))) + // match: (CompressUint32x4 x mask) + // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh32x16 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint32x8 x mask) + // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x32(v *Value) bool { +func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [32]))))) + // match: (CompressUint64x2 x mask) + // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh32x32 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint64x4 x mask) + // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x64(v *Value) bool { +func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [32]))))) + // match: (CompressUint64x8 x mask) + // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh32x64 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint8x16 x mask) + // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x8(v *Value) bool { +func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [32]))))) + // match: (CompressUint8x32 x mask) + // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) v.AddArg2(x, v0) return true } - // match: (Rsh32x8 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (CompressUint8x64 x mask) + // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM mask)) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VPCOMPRESSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool { +func rewriteValueAMD64_OpCondSelect(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPWconst y [64]))) + // match: (CondSelect x y (SETEQ cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQEQ y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETEQ { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Rsh64Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPLconst y [64]))) + // match: (CondSelect x y (SETNE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETNE { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) + // match: (CondSelect x y (SETL cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLT y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETL { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPQconst y [64]))) + // match: (CondSelect x y (SETG cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGT y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETG { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) + // match: (CondSelect x y (SETLE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLE y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETLE { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPBconst y [64]))) + // match: (CondSelect x y (SETGE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGE y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGE { break } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) + // match: (CondSelect x y (SETA cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQHI y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETA { break } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [64]))))) + // match: (CondSelect x y (SETB cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQCS y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETB { break } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64x16 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) + // match: (CondSelect x y (SETAE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQCC y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETAE { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [64]))))) + // match: (CondSelect x y (SETBE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLS y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETBE { break } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64x32 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) + // match: (CondSelect x y (SETEQF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQEQF y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETEQF { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQEQF) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [64]))))) + // match: (CondSelect x y (SETNEF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNEF y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETNEF { break } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQNEF) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64x64 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) + // match: (CondSelect x y (SETGF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGTF y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGF { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGTF) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh64x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [64]))))) + // match: (CondSelect x y (SETGEF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGEF y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGEF { break } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGEF) + v.AddArg3(y, x, cond) return true } - // match: (Rsh64x8 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) + // match: (CondSelect x y (SETEQ cond)) + // cond: is32BitInt(t) + // result: (CMOVLEQ y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETEQ { break } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPWconst y [8]))) + // match: (CondSelect x y (SETNE cond)) + // cond: is32BitInt(t) + // result: (CMOVLNE y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETNE { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) + // match: (CondSelect x y (SETL cond)) + // cond: is32BitInt(t) + // result: (CMOVLLT y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETL { break } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPLconst y [8]))) + // match: (CondSelect x y (SETG cond)) + // cond: is32BitInt(t) + // result: (CMOVLGT y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETG { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) + // match: (CondSelect x y (SETLE cond)) + // cond: is32BitInt(t) + // result: (CMOVLLE y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETLE { break } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPQconst y [8]))) + // match: (CondSelect x y (SETGE cond)) + // cond: is32BitInt(t) + // result: (CMOVLGE y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGE { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) + // match: (CondSelect x y (SETA cond)) + // cond: is32BitInt(t) + // result: (CMOVLHI y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETA { break } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPBconst y [8]))) + // match: (CondSelect x y (SETB cond)) + // cond: is32BitInt(t) + // result: (CMOVLCS y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETB { break } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) + // match: (CondSelect x y (SETAE cond)) + // cond: is32BitInt(t) + // result: (CMOVLCC y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETAE { break } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [8]))))) + // match: (CondSelect x y (SETBE cond)) + // cond: is32BitInt(t) + // result: (CMOVLLS y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETBE { break } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8x16 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) + // match: (CondSelect x y (SETEQF cond)) + // cond: is32BitInt(t) + // result: (CMOVLEQF y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETEQF { break } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [8]))))) + // match: (CondSelect x y (SETNEF cond)) + // cond: is32BitInt(t) + // result: (CMOVLNEF y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETNEF { break } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8x32 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) + // match: (CondSelect x y (SETGF cond)) + // cond: is32BitInt(t) + // result: (CMOVLGTF y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGF { break } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [8]))))) + // match: (CondSelect x y (SETGEF cond)) + // cond: is32BitInt(t) + // result: (CMOVLGEF y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETGEF { break } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8x64 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) + // match: (CondSelect x y (SETEQ cond)) + // cond: is16BitInt(t) + // result: (CMOVWEQ y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETEQ { break } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpRsh8x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Rsh8x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [8]))))) + // match: (CondSelect x y (SETNE cond)) + // cond: is16BitInt(t) + // result: (CMOVWNE y x cond) for { t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETNE { break } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(y, x, cond) return true } - // match: (Rsh8x8 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) + // match: (CondSelect x y (SETL cond)) + // cond: is16BitInt(t) + // result: (CMOVWLT y x cond) for { + t := v.Type x := v_0 y := v_1 - if !(shiftIsBounded(v)) { + if v_2.Op != OpAMD64SETL { break } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) - return true - } - return false -} -func rewriteValueAMD64_OpSelect0(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Select0 (Mul64uover x y)) - // result: (Select0 (MULQU x y)) - for { - if v_0.Op != OpMul64uover { + cond := v_2.Args[0] + if !(is16BitInt(t)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpSelect0) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64CMOVWLT) + v.AddArg3(y, x, cond) return true } - // match: (Select0 (Mul32uover x y)) - // result: (Select0 (MULLU x y)) + // match: (CondSelect x y (SETG cond)) + // cond: is16BitInt(t) + // result: (CMOVWGT y x cond) for { - if v_0.Op != OpMul32uover { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETG { break } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpSelect0) - v.Type = typ.UInt32 - v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) - v0.AddArg2(x, y) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(y, x, cond) return true } - // match: (Select0 (Add64carry x y c)) - // result: (Select0 (ADCQ x y (Select1 (NEGLflags c)))) + // match: (CondSelect x y (SETLE cond)) + // cond: is16BitInt(t) + // result: (CMOVWLE y x cond) for { - if v_0.Op != OpAdd64carry { - break - } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v2.AddArg(c) - v1.AddArg(v2) - v0.AddArg3(x, y, v1) - v.AddArg(v0) - return true - } - // match: (Select0 (Sub64borrow x y c)) - // result: (Select0 (SBBQ x y (Select1 (NEGLflags c)))) - for { - if v_0.Op != OpSub64borrow { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETLE { break } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v2.AddArg(c) - v1.AddArg(v2) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(y, x, cond) return true } - // match: (Select0 (AddTupleFirst32 val tuple)) - // result: (ADDL val (Select0 tuple)) + // match: (CondSelect x y (SETGE cond)) + // cond: is16BitInt(t) + // result: (CMOVWGE y x cond) for { t := v.Type - if v_0.Op != OpAMD64AddTupleFirst32 { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETGE { break } - tuple := v_0.Args[1] - val := v_0.Args[0] - v.reset(OpAMD64ADDL) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v0.AddArg(tuple) - v.AddArg2(val, v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(y, x, cond) return true } - // match: (Select0 (AddTupleFirst64 val tuple)) - // result: (ADDQ val (Select0 tuple)) + // match: (CondSelect x y (SETA cond)) + // cond: is16BitInt(t) + // result: (CMOVWHI y x cond) for { t := v.Type - if v_0.Op != OpAMD64AddTupleFirst64 { + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETA { break } - tuple := v_0.Args[1] - val := v_0.Args[0] - v.reset(OpAMD64ADDQ) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v0.AddArg(tuple) - v.AddArg2(val, v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(y, x, cond) return true } - // match: (Select0 a:(ADDQconstflags [c] x)) - // cond: a.Uses == 1 - // result: (ADDQconst [c] x) + // match: (CondSelect x y (SETB cond)) + // cond: is16BitInt(t) + // result: (CMOVWCS y x cond) for { - a := v_0 - if a.Op != OpAMD64ADDQconstflags { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETB { break } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { + cond := v_2.Args[0] + if !(is16BitInt(t)) { break } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + v.reset(OpAMD64CMOVWCS) + v.AddArg3(y, x, cond) return true } - // match: (Select0 a:(ADDLconstflags [c] x)) - // cond: a.Uses == 1 - // result: (ADDLconst [c] x) + // match: (CondSelect x y (SETAE cond)) + // cond: is16BitInt(t) + // result: (CMOVWCC y x cond) for { - a := v_0 - if a.Op != OpAMD64ADDLconstflags { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETAE { break } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { + cond := v_2.Args[0] + if !(is16BitInt(t)) { break } - v.reset(OpAMD64ADDLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + v.reset(OpAMD64CMOVWCC) + v.AddArg3(y, x, cond) return true } - return false -} -func rewriteValueAMD64_OpSelect1(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Select1 (Mul64uover x y)) - // result: (SETO (Select1 (MULQU x y))) + // match: (CondSelect x y (SETBE cond)) + // cond: is16BitInt(t) + // result: (CMOVWLS y x cond) for { - if v_0.Op != OpMul64uover { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETBE { break } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpAMD64SETO) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1.AddArg2(x, y) - v0.AddArg(v1) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(y, x, cond) return true } - // match: (Select1 (Mul32uover x y)) - // result: (SETO (Select1 (MULLU x y))) + // match: (CondSelect x y (SETEQF cond)) + // cond: is16BitInt(t) + // result: (CMOVWEQF y x cond) for { - if v_0.Op != OpMul32uover { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETEQF { break } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpAMD64SETO) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) - v1.AddArg2(x, y) - v0.AddArg(v1) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(y, x, cond) return true } - // match: (Select1 (Add64carry x y c)) - // result: (MOVBQZX (SETB (Select1 (ADCQ x y (Select1 (NEGLflags c)))))) + // match: (CondSelect x y (SETNEF cond)) + // cond: is16BitInt(t) + // result: (CMOVWNEF y x cond) for { - if v_0.Op != OpAdd64carry { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETNEF { break } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpAMD64MOVBQZX) - v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8]) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v4.AddArg(c) - v3.AddArg(v4) - v2.AddArg3(x, y, v3) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(y, x, cond) return true } - // match: (Select1 (Sub64borrow x y c)) - // result: (MOVBQZX (SETB (Select1 (SBBQ x y (Select1 (NEGLflags c)))))) + // match: (CondSelect x y (SETGF cond)) + // cond: is16BitInt(t) + // result: (CMOVWGTF y x cond) for { - if v_0.Op != OpSub64borrow { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETGF { break } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpAMD64MOVBQZX) - v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8]) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v4.AddArg(c) - v3.AddArg(v4) - v2.AddArg3(x, y, v3) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(y, x, cond) return true } - // match: (Select1 (NEGLflags (MOVQconst [0]))) - // result: (FlagEQ) + // match: (CondSelect x y (SETGEF cond)) + // cond: is16BitInt(t) + // result: (CMOVWGEF y x cond) for { - if v_0.Op != OpAMD64NEGLflags { + t := v.Type + x := v_0 + y := v_1 + if v_2.Op != OpAMD64SETGEF { break } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 { + cond := v_2.Args[0] + if !(is16BitInt(t)) { break } - v.reset(OpAMD64FlagEQ) + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(y, x, cond) return true } - // match: (Select1 (NEGLflags (MOVBQZX (SETB x)))) - // result: x + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPQconst [0] check)) for { - if v_0.Op != OpAMD64NEGLflags { - break - } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVBQZX { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { break } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64SETB { + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) + // result: (CMOVLNE y x (CMPQconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { break } - x := v_0_0_0.Args[0] - v.copyOf(x) + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 (AddTupleFirst32 _ tuple)) - // result: (Select1 tuple) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) + // result: (CMOVWNE y x (CMPQconst [0] check)) for { - if v_0.Op != OpAMD64AddTupleFirst32 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { break } - tuple := v_0.Args[1] - v.reset(OpSelect1) - v.AddArg(tuple) + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 (AddTupleFirst64 _ tuple)) - // result: (Select1 tuple) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPLconst [0] check)) for { - if v_0.Op != OpAMD64AddTupleFirst64 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && (is64BitInt(t) || isPtr(t))) { break } - tuple := v_0.Args[1] - v.reset(OpSelect1) - v.AddArg(tuple) + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ANDQlock ptr val mem) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t) + // result: (CMOVLNE y x (CMPLconst [0] check)) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicAnd64 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is32BitInt(t)) { break } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t) + // result: (CMOVWNE y x (CMPLconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 4 && is16BitInt(t)) { break } - v.reset(OpAMD64ANDQlock) - v.AddArg3(ptr, val, mem) + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ANDLlock ptr val mem) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPWconst [0] check)) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicAnd32 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && (is64BitInt(t) || isPtr(t))) { break } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t) + // result: (CMOVLNE y x (CMPWconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is32BitInt(t)) { break } - v.reset(OpAMD64ANDLlock) - v.AddArg3(ptr, val, mem) + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 a:(LoweredAtomicOr64 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ORQlock ptr val mem) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t) + // result: (CMOVWNE y x (CMPWconst [0] check)) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicOr64 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2 && is16BitInt(t)) { break } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v0.AuxInt = int16ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPBconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && (is64BitInt(t) || isPtr(t))) { break } - v.reset(OpAMD64ORQlock) - v.AddArg3(ptr, val, mem) + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - // match: (Select1 a:(LoweredAtomicOr32 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ORLlock ptr val mem) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t) + // result: (CMOVLNE y x (CMPBconst [0] check)) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicOr32 { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is32BitInt(t)) { break } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) + return true + } + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t) + // result: (CMOVWNE y x (CMPBconst [0] check)) + for { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1 && is16BitInt(t)) { break } - v.reset(OpAMD64ORLlock) - v.AddArg3(ptr, val, mem) + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } return false } -func rewriteValueAMD64_OpSelectN(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) - // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call) - // result: (Move [sc.Val64()] dst src mem) +func rewriteValueAMD64_OpConst16(v *Value) bool { + // match: (Const16 [c]) + // result: (MOVLconst [int32(c)]) for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - call := v_0 - if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 { - break - } - sym := auxToCall(call.Aux) - s1 := call.Args[0] - if s1.Op != OpAMD64MOVQstoreconst { - break - } - sc := auxIntToValAndOff(s1.AuxInt) - _ = s1.Args[1] - s2 := s1.Args[1] - if s2.Op != OpAMD64MOVQstore { - break - } - _ = s2.Args[2] - src := s2.Args[1] - s3 := s2.Args[2] - if s3.Op != OpAMD64MOVQstore { - break - } - mem := s3.Args[2] - dst := s3.Args[1] - if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) { - break - } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(sc.Val64()) - v.AddArg3(dst, src, mem) + c := auxIntToInt16(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(c)) return true } - // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) - // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) - // result: (Move [sz] dst src mem) +} +func rewriteValueAMD64_OpConst8(v *Value) bool { + // match: (Const8 [c]) + // result: (MOVLconst [int32(c)]) for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - call := v_0 - if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 { - break - } - sym := auxToCall(call.Aux) - mem := call.Args[3] - dst := call.Args[0] - src := call.Args[1] - call_2 := call.Args[2] - if call_2.Op != OpAMD64MOVQconst { - break - } - sz := auxIntToInt64(call_2.AuxInt) - if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) { - break - } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(sz) - v.AddArg3(dst, src, mem) + c := auxIntToInt8(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(c)) return true } - return false } -func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiFloat32x16 x y) - // result: (VINSERTF64X4512 [1] x y) +func rewriteValueAMD64_OpConstBool(v *Value) bool { + // match: (ConstBool [c]) + // result: (MOVLconst [b2i32(c)]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + c := auxIntToBool(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(b2i32(c)) return true } } -func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiFloat32x8 x y) - // result: (VINSERTF128256 [1] x y) +func rewriteValueAMD64_OpConstNil(v *Value) bool { + // match: (ConstNil ) + // result: (MOVQconst [0]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) return true } } -func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz16(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiFloat64x4 x y) - // result: (VINSERTF128256 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz16 x) + // result: (BSFL (ORLconst [1<<16] x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64BSFL) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 16) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiFloat64x8 x y) - // result: (VINSERTF64X4512 [1] x y) + // match: (Ctz16NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt16x16 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz16NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz32(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiInt16x32 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz32 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt32x16 x y) - // result: (VINSERTI64X4512 [1] x y) + // match: (Ctz32 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSFQ (BTSQconst [32] x))) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64) + v1.AuxInt = int8ToAuxInt(32) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiInt32x8 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz32NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt64x4 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz32NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz64(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiInt64x8 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz64 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTQ x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTQ) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt8x32 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz64 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64CMOVQEQ) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1.AddArg(x) + v0.AddArg(v1) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) + v2.AuxInt = int64ToAuxInt(64) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v3.AddArg(v1) + v.AddArg3(v0, v2, v3) return true } + return false } -func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiInt8x64 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz64NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTQ x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTQ) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint16x16 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz64NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSFQ x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v0.AddArg(x) + v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz8(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint16x32 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz8 x) + // result: (BSFL (ORLconst [1<<8 ] x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64BSFL) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 8) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint32x16 x y) - // result: (VINSERTI64X4512 [1] x y) + // match: (Ctz8NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint32x8 x y) - // result: (VINSERTI128256 [1] x y) + // match: (Ctz8NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint64x4 x y) - // result: (VINSERTI128256 [1] x y) + b := v.Block + // match: (Cvt16toMask16x16 x) + // result: (VPMOVMToVec16x16 (KMOVWk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec16x16) + v.Type = types.TypeVec256 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint64x8 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + // match: (Cvt16toMask32x16 x) + // result: (VPMOVMToVec32x16 (KMOVWk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec32x16) + v.Type = types.TypeVec512 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint8x32 x y) - // result: (VINSERTI128256 [1] x y) + b := v.Block + // match: (Cvt16toMask8x16 x) + // result: (VPMOVMToVec8x16 (KMOVWk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec8x16) + v.Type = types.TypeVec128 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt32Fto32(v *Value) bool { v_0 := v.Args[0] - // match: (SetHiUint8x64 x y) - // result: (VINSERTI64X4512 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Cvt32Fto32 x) + // cond: base.ConvertHash.MatchPos(v.Pos, nil) + // result: (XORL y (SARLconst [31] (ANDL y:(CVTTSS2SL x) (NOTL (MOVLf2i x))))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) + if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64XORL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t) + v0.AuxInt = int8ToAuxInt(31) + v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t) + y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SL, t) + y.AddArg(x) + v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32) + v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32) + v4.AddArg(x) + v3.AddArg(v4) + v1.AddArg2(y, v3) + v0.AddArg(v1) + v.AddArg2(y, v0) return true } -} -func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat32x16 x y) - // result: (VINSERTF64X4512 [0] x y) + // match: (Cvt32Fto32 x) + // cond: !base.ConvertHash.MatchPos(v.Pos, nil) + // result: (CVTTSS2SL x) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64CVTTSS2SL) + v.Type = t + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt32Fto64(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoFloat32x8 x y) - // result: (VINSERTF128256 [0] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Cvt32Fto64 x) + // cond: base.ConvertHash.MatchPos(v.Pos, nil) + // result: (XORQ y (SARQconst [63] (ANDQ y:(CVTTSS2SQ x) (NOTQ (MOVQf2i (CVTSS2SD x))) ))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64XORQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t) + v0.AuxInt = int8ToAuxInt(63) + v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t) + y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SQ, t) + y.AddArg(x) + v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64) + v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64) + v5 := b.NewValue0(v.Pos, OpAMD64CVTSS2SD, typ.Float64) + v5.AddArg(x) + v4.AddArg(v5) + v3.AddArg(v4) + v1.AddArg2(y, v3) + v0.AddArg(v1) + v.AddArg2(y, v0) return true } -} -func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat64x4 x y) - // result: (VINSERTF128256 [0] x y) + // match: (Cvt32Fto64 x) + // cond: !base.ConvertHash.MatchPos(v.Pos, nil) + // result: (CVTTSS2SQ x) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64CVTTSS2SQ) + v.Type = t + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoFloat64x8 x y) - // result: (VINSERTF64X4512 [0] x y) + b := v.Block + // match: (Cvt32toMask16x32 x) + // result: (VPMOVMToVec16x32 (KMOVDk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec16x32) + v.Type = types.TypeVec512 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt16x16 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (Cvt32toMask8x32 x) + // result: (VPMOVMToVec8x32 (KMOVDk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec8x32) + v.Type = types.TypeVec256 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt64Fto32(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt16x32 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Cvt64Fto32 x) + // cond: base.ConvertHash.MatchPos(v.Pos, nil) + // result: (XORL y (SARLconst [31] (ANDL y:(CVTTSD2SL x) (NOTL (MOVLf2i (CVTSD2SS x)))))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64XORL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t) + v0.AuxInt = int8ToAuxInt(31) + v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t) + y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SL, t) + y.AddArg(x) + v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32) + v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpAMD64CVTSD2SS, typ.Float32) + v5.AddArg(x) + v4.AddArg(v5) + v3.AddArg(v4) + v1.AddArg2(y, v3) + v0.AddArg(v1) + v.AddArg2(y, v0) return true } -} -func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt32x16 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (Cvt64Fto32 x) + // cond: !base.ConvertHash.MatchPos(v.Pos, nil) + // result: (CVTTSD2SL x) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64CVTTSD2SL) + v.Type = t + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt64Fto64(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt32x8 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (Cvt64Fto64 x) + // cond: base.ConvertHash.MatchPos(v.Pos, nil) + // result: (XORQ y (SARQconst [63] (ANDQ y:(CVTTSD2SQ x) (NOTQ (MOVQf2i x))))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + if !(base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64XORQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t) + v0.AuxInt = int8ToAuxInt(63) + v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t) + y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SQ, t) + y.AddArg(x) + v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64) + v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64) + v4.AddArg(x) + v3.AddArg(v4) + v1.AddArg2(y, v3) + v0.AddArg(v1) + v.AddArg2(y, v0) + return true + } + // match: (Cvt64Fto64 x) + // cond: !base.ConvertHash.MatchPos(v.Pos, nil) + // result: (CVTTSD2SQ x) + for { + t := v.Type + x := v_0 + if !(!base.ConvertHash.MatchPos(v.Pos, nil)) { + break + } + v.reset(OpAMD64CVTTSD2SQ) + v.Type = t + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt64x4 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (Cvt64toMask8x64 x) + // result: (VPMOVMToVec8x64 (KMOVQk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec8x64) + v.Type = types.TypeVec512 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt64x8 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (Cvt8toMask16x8 x) + // result: (VPMOVMToVec16x8 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec16x8) + v.Type = types.TypeVec128 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt8x32 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (Cvt8toMask32x4 x) + // result: (VPMOVMToVec32x4 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec32x4) + v.Type = types.TypeVec128 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoInt8x64 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (Cvt8toMask32x8 x) + // result: (VPMOVMToVec32x8 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec32x8) + v.Type = types.TypeVec256 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint16x16 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (Cvt8toMask64x2 x) + // result: (VPMOVMToVec64x2 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec64x2) + v.Type = types.TypeVec128 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint16x32 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (Cvt8toMask64x4 x) + // result: (VPMOVMToVec64x4 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec64x4) + v.Type = types.TypeVec256 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint32x16 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (Cvt8toMask64x8 x) + // result: (VPMOVMToVec64x8 (KMOVBk x)) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VPMOVMToVec64x8) + v.Type = types.TypeVec512 + v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint32x8 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (CvtMask16x16to16 x) + // result: (KMOVWi (VPMOVVec16x16ToM x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64KMOVWi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint64x4 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (CvtMask16x32to32 x) + // result: (KMOVDi (VPMOVVec16x32ToM x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64KMOVDi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint64x8 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (CvtMask16x8to8 x) + // result: (KMOVBi (VPMOVVec16x8ToM x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64KMOVBi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint8x32 x y) - // result: (VINSERTI128256 [0] x y) + b := v.Block + // match: (CvtMask32x16to16 x) + // result: (KMOVWi (VPMOVVec32x16ToM x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64KMOVWi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool { v_0 := v.Args[0] - // match: (SetLoUint8x64 x y) - // result: (VINSERTI64X4512 [0] x y) + b := v.Block + // match: (CvtMask64x8to8 x) + // result: (KMOVBi (VPMOVVec64x8ToM x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64KMOVBi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSlicemask(v *Value) bool { +func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (Slicemask x) - // result: (SARQconst (NEGQ x) [63]) + // match: (CvtMask8x64to64 x) + // result: (KMOVQi (VPMOVVec8x64ToM x)) for { - t := v.Type x := v_0 - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(63) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v.reset(OpAMD64KMOVQi) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSpectreIndex(v *Value) bool { +func rewriteValueAMD64_OpDiv16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SpectreIndex x y) - // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) + // match: (Div16 [a] x y) + // result: (Select0 (DIVW [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64CMOVQCC) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v1.AddArg2(x, y) - v.AddArg3(x, v0, v1) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool { +func rewriteValueAMD64_OpDiv16u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (SpectreSliceIndex x y) - // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) + // match: (Div16u x y) + // result: (Select0 (DIVWU x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64CMOVQHI) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v1.AddArg2(x, y) - v.AddArg3(x, v0, v1) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpStore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpDiv32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Store {t} ptr val mem) - // cond: t.Size() == 8 && t.IsFloat() - // result: (MOVSDstore ptr val mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (Div32 [a] x y) + // result: (Select0 (DIVL [a] x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 8 && t.IsFloat()) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AddArg3(ptr, val, mem) + a := auxIntToBool(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 4 && t.IsFloat() - // result: (MOVSSstore ptr val mem) +} +func rewriteValueAMD64_OpDiv32u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Div32u x y) + // result: (Select0 (DIVLU x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 4 && t.IsFloat()) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 8 && !t.IsFloat() - // result: (MOVQstore ptr val mem) +} +func rewriteValueAMD64_OpDiv64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Div64 [a] x y) + // result: (Select0 (DIVQ [a] x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 8 && !t.IsFloat()) { - break - } - v.reset(OpAMD64MOVQstore) - v.AddArg3(ptr, val, mem) + a := auxIntToBool(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 4 && !t.IsFloat() - // result: (MOVLstore ptr val mem) +} +func rewriteValueAMD64_OpDiv64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Div64u x y) + // result: (Select0 (DIVQU x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 4 && !t.IsFloat()) { - break - } - v.reset(OpAMD64MOVLstore) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 2 - // result: (MOVWstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 2) { - break - } - v.reset(OpAMD64MOVWstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 1 - // result: (MOVBstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 1) { - break - } - v.reset(OpAMD64MOVBstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 16 - // result: (VMOVDQUstore128 ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 16) { - break - } - v.reset(OpAMD64VMOVDQUstore128) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 32 - // result: (VMOVDQUstore256 ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 32) { - break - } - v.reset(OpAMD64VMOVDQUstore256) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 64 - // result: (VMOVDQUstore512 ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VMOVDQUstore512) - v.AddArg3(ptr, val, mem) - return true - } - return false } -func rewriteValueAMD64_OpStoreMasked16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpDiv8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (StoreMasked16 {t} ptr mask val mem) - // cond: t.Size() == 64 - // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM mask) val mem) + typ := &b.Func.Config.Types + // match: (Div8 x y) + // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VPMASK16store512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(ptr, v0, val, mem) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) return true } - return false } -func rewriteValueAMD64_OpStoreMasked32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpDiv8u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (StoreMasked32 {t} ptr mask val mem) - // cond: t.Size() == 16 - // result: (VPMASK32store128 ptr mask val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 16) { - break - } - v.reset(OpAMD64VPMASK32store128) - v.AddArg4(ptr, mask, val, mem) - return true - } - // match: (StoreMasked32 {t} ptr mask val mem) - // cond: t.Size() == 32 - // result: (VPMASK32store256 ptr mask val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 32) { - break - } - v.reset(OpAMD64VPMASK32store256) - v.AddArg4(ptr, mask, val, mem) - return true - } - // match: (StoreMasked32 {t} ptr mask val mem) - // cond: t.Size() == 64 - // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM mask) val mem) + typ := &b.Func.Config.Types + // match: (Div8u x y) + // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VPMASK32store512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(ptr, v0, val, mem) + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) return true } - return false } -func rewriteValueAMD64_OpStoreMasked64(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpEq16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (StoreMasked64 {t} ptr mask val mem) - // cond: t.Size() == 16 - // result: (VPMASK64store128 ptr mask val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 16) { - break - } - v.reset(OpAMD64VPMASK64store128) - v.AddArg4(ptr, mask, val, mem) - return true - } - // match: (StoreMasked64 {t} ptr mask val mem) - // cond: t.Size() == 32 - // result: (VPMASK64store256 ptr mask val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 32) { - break - } - v.reset(OpAMD64VPMASK64store256) - v.AddArg4(ptr, mask, val, mem) - return true - } - // match: (StoreMasked64 {t} ptr mask val mem) - // cond: t.Size() == 64 - // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM mask) val mem) + // match: (Eq16 x y) + // result: (SETEQ (CMPW x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VPMASK64store512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(ptr, v0, val, mem) + x := v_0 + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - return false } -func rewriteValueAMD64_OpStoreMasked8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpEq32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (StoreMasked8 {t} ptr mask val mem) - // cond: t.Size() == 64 - // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM mask) val mem) + // match: (Eq32 x y) + // result: (SETEQ (CMPL x y)) for { - t := auxToType(v.Aux) - ptr := v_0 - mask := v_1 - val := v_2 - mem := v_3 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VPMASK8store512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(ptr, v0, val, mem) + x := v_0 + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - return false } -func rewriteValueAMD64_OpTrunc(v *Value) bool { +func rewriteValueAMD64_OpEq32F(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Trunc x) - // result: (ROUNDSD [3] x) + b := v.Block + // match: (Eq32F x y) + // result: (SETEQF (UCOMISS x y)) for { x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpEq64(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat32x4 x) - // result: (VROUNDPS128 [3] x) + b := v.Block + // match: (Eq64 x y) + // result: (SETEQ (CMPQ x y)) for { x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpEq64F(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat32x8 x) - // result: (VROUNDPS256 [3] x) + b := v.Block + // match: (Eq64F x y) + // result: (SETEQF (UCOMISD x y)) for { x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpEq8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat64x2 x) - // result: (VROUNDPD128 [3] x) + b := v.Block + // match: (Eq8 x y) + // result: (SETEQ (CMPB x y)) for { x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpEqB(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat64x4 x) - // result: (VROUNDPD256 [3] x) + b := v.Block + // match: (EqB x y) + // result: (SETEQ (CMPB x y)) for { x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = uint8ToAuxInt(3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpEqPtr(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+3] x) + b := v.Block + // match: (EqPtr x y) + // result: (SETEQ (CMPQ x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(0) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+3] x) + // match: (EqualFloat32x4 x y) + // result: (VCMPPS128 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpTruncScaledFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+3] x) + // match: (EqualFloat32x8 x y) + // result: (VCMPPS256 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpTruncScaledFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+3] x) + // match: (EqualFloat64x2 x y) + // result: (VCMPPD128 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+3] x) + // match: (EqualFloat64x4 x y) + // result: (VCMPPD256 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(0) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncScaledResidueFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = uint8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpZero(v *Value) bool { +func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Zero [0] _ mem) - // result: mem + // match: (EqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y)) for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - mem := v_1 - v.copyOf(mem) + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Zero [1] destptr mem) - // result: (MOVBstoreconst [makeValAndOff(0,0)] destptr mem) +} +func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y)) for { - if auxIntToInt64(v.AuxInt) != 1 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v.AddArg2(destptr, mem) + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Zero [2] destptr mem) - // result: (MOVWstoreconst [makeValAndOff(0,0)] destptr mem) +} +func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y)) for { - if auxIntToInt64(v.AuxInt) != 2 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v.AddArg2(destptr, mem) + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) return true } - // match: (Zero [4] destptr mem) - // result: (MOVLstoreconst [makeValAndOff(0,0)] destptr mem) +} +func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat32x16 x mask) + // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 4 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v.AddArg2(destptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [8] destptr mem) - // result: (MOVQstoreconst [makeValAndOff(0,0)] destptr mem) +} +func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat32x4 x mask) + // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 8 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v.AddArg2(destptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [3] destptr mem) - // result: (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat32x8 x mask) + // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 3 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 2)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [5] destptr mem) - // result: (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat64x2 x mask) + // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 5 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 4)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [6] destptr mem) - // result: (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat64x4 x mask) + // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 6 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 4)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [7] destptr mem) - // result: (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandFloat64x8 x mask) + // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 7 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 3)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VEXPANDPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [9] destptr mem) - // result: (MOVBstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt16x16 x mask) + // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 9 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [10] destptr mem) - // result: (MOVWstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt16x32 x mask) + // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 10 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [11] destptr mem) - // result: (MOVLstoreconst [makeValAndOff(0,7)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt16x8 x mask) + // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 11 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 7)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [12] destptr mem) - // result: (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt32x16 x mask) + // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 12 { - break - } - destptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [s] destptr mem) - // cond: s > 12 && s < 16 - // result: (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) +} +func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt32x4 x mask) + // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - destptr := v_0 - mem := v_1 - if !(s > 12 && s < 16) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, int32(s-8))) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) - v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v0.AddArg2(destptr, mem) - v.AddArg2(destptr, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [s] destptr mem) - // cond: s >= 16 && s < 192 - // result: (LoweredZero [s] destptr mem) +} +func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt32x8 x mask) + // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - destptr := v_0 - mem := v_1 - if !(s >= 16 && s < 192) { - break - } - v.reset(OpAMD64LoweredZero) - v.AuxInt = int64ToAuxInt(s) - v.AddArg2(destptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Zero [s] destptr mem) - // cond: s >= 192 && s <= repZeroThreshold - // result: (LoweredZeroLoop [s] destptr mem) +} +func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt64x2 x mask) + // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - destptr := v_0 - mem := v_1 - if !(s >= 192 && s <= repZeroThreshold) { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt64x4 x mask) + // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt64x8 x mask) + // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt8x16 x mask) + // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt8x32 x mask) + // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandInt8x64 x mask) + // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint16x16 x mask) + // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint16x32 x mask) + // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint16x8 x mask) + // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint32x16 x mask) + // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint32x4 x mask) + // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint32x8 x mask) + // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint64x2 x mask) + // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint64x4 x mask) + // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint64x8 x mask) + // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint8x16 x mask) + // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint8x32 x mask) + // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ExpandUint8x64 x mask) + // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VPEXPANDBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpFMA(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMA x y z) + // result: (VFMADD231SD z x y) + for { + x := v_0 + y := v_1 + z := v_2 + v.reset(OpAMD64VFMADD231SD) + v.AddArg3(z, x, y) + return true + } +} +func rewriteValueAMD64_OpFloor(v *Value) bool { + v_0 := v.Args[0] + // match: (Floor x) + // result: (ROUNDSD [1] x) + for { + x := v_0 + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat32x4 x) + // result: (VROUNDPS128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat32x8 x) + // result: (VROUNDPS256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat64x2 x) + // result: (VROUNDPD128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat64x4 x) + // result: (VROUNDPD256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorScaledResidueFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+1] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = uint8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetG(v *Value) bool { + v_0 := v.Args[0] + // match: (GetG mem) + // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal + // result: (LoweredGetG mem) + for { + mem := v_0 + if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) { + break + } + v.reset(OpAMD64LoweredGetG) + v.AddArg(mem) + return true + } + return false +} +func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat32x16 x) + // result: (VEXTRACTF64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat32x8 x) + // result: (VEXTRACTF128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat64x4 x) + // result: (VEXTRACTF128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat64x8 x) + // result: (VEXTRACTF64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt16x16 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt16x32 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt32x16 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt32x8 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt64x4 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt64x8 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt8x32 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt8x64 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint16x16 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint16x32 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint32x16 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint32x8 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint64x4 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint64x8 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint8x32 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint8x64 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat32x16 x) + // result: (VEXTRACTF64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat32x8 x) + // result: (VEXTRACTF128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat64x4 x) + // result: (VEXTRACTF128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat64x8 x) + // result: (VEXTRACTF64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt16x16 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt16x32 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt32x16 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt32x8 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt64x4 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt64x8 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt8x32 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt8x64 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint16x16 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint16x32 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint32x16 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint32x8 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint64x4 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint64x8 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint8x32 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint8x64 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterEqualFloat32x4 x y) + // result: (VCMPPS128 [13] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(13) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterEqualFloat32x8 x y) + // result: (VCMPPS256 [13] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(13) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterEqualFloat64x2 x y) + // result: (VCMPPD128 [13] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(13) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterEqualFloat64x4 x y) + // result: (VCMPPD256 [13] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(13) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterFloat32x4 x y) + // result: (VCMPPS128 [14] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(14) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterFloat32x8 x y) + // result: (VCMPPS256 [14] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(14) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterFloat64x2 x y) + // result: (VCMPPD128 [14] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(14) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GreaterFloat64x4 x y) + // result: (VCMPPD256 [14] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(14) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (HasCPUFeature {s}) + // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) + for { + s := auxToSym(v.Aux) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64) + v1.Aux = symToAux(s) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsInBounds(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (IsInBounds idx len) + // result: (SETB (CMPQ idx len)) + for { + idx := v_0 + len := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(idx, len) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (IsNaNFloat32x16 x) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) + for { + x := v_0 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (IsNaNFloat32x4 x) + // result: (VCMPPS128 [3] x x) + for { + x := v_0 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, x) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (IsNaNFloat32x8 x) + // result: (VCMPPS256 [3] x x) + for { + x := v_0 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, x) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (IsNaNFloat64x2 x) + // result: (VCMPPD128 [3] x x) + for { + x := v_0 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, x) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (IsNaNFloat64x4 x) + // result: (VCMPPD256 [3] x x) + for { + x := v_0 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg2(x, x) + return true + } +} +func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (IsNaNFloat64x8 x) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) + for { + x := v_0 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(3) + v0.AddArg2(x, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsNonNil(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (IsNonNil p) + // result: (SETNE (TESTQ p p)) + for { + p := v_0 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags) + v0.AddArg2(p, p) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (IsSliceInBounds idx len) + // result: (SETBE (CMPQ idx len)) + for { + idx := v_0 + len := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(idx, len) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsZeroVec(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (IsZeroVec x) + // result: (SETEQ (VPTEST x x)) + for { + x := v_0 + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(x, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq16 x y) + // result: (SETLE (CMPW x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq16U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq16U x y) + // result: (SETBE (CMPW x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq32 x y) + // result: (SETLE (CMPL x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq32F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq32F x y) + // result: (SETGEF (UCOMISS y x)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETGEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq32U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq32U x y) + // result: (SETBE (CMPL x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq64 x y) + // result: (SETLE (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq64F x y) + // result: (SETGEF (UCOMISD y x)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETGEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq64U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq64U x y) + // result: (SETBE (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq8 x y) + // result: (SETLE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq8U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq8U x y) + // result: (SETBE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less16 x y) + // result: (SETL (CMPW x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess16U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less16U x y) + // result: (SETB (CMPW x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less32 x y) + // result: (SETL (CMPL x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess32F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less32F x y) + // result: (SETGF (UCOMISS y x)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETGF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess32U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less32U x y) + // result: (SETB (CMPL x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less64 x y) + // result: (SETL (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less64F x y) + // result: (SETGF (UCOMISD y x)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETGF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess64U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less64U x y) + // result: (SETB (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less8 x y) + // result: (SETL (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLess8U(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Less8U x y) + // result: (SETB (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat32x4 x y) + // result: (VCMPPS128 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat32x8 x y) + // result: (VCMPPS256 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat64x2 x y) + // result: (VCMPPD128 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat64x4 x y) + // result: (VCMPPD256 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessFloat32x4 x y) + // result: (VCMPPS128 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessFloat32x8 x y) + // result: (VCMPPS256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessFloat64x2 x y) + // result: (VCMPPD128 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessFloat64x4 x y) + // result: (VCMPPD256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(1) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLoad(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Load ptr mem) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (MOVQload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64MOVQload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is32BitInt(t) + // result: (MOVLload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64MOVLload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is16BitInt(t) + // result: (MOVWload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64MOVWload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: (t.IsBoolean() || is8BitInt(t)) + // result: (MOVBload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.IsBoolean() || is8BitInt(t)) { + break + } + v.reset(OpAMD64MOVBload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is32BitFloat(t) + // result: (MOVSSload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is32BitFloat(t)) { + break + } + v.reset(OpAMD64MOVSSload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is64BitFloat(t) + // result: (MOVSDload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is64BitFloat(t)) { + break + } + v.reset(OpAMD64MOVSDload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 16 + // result: (VMOVDQUload128 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VMOVDQUload128) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 32 + // result: (VMOVDQUload256 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VMOVDQUload256) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 64 + // result: (VMOVDQUload512 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VMOVDQUload512) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLoadMasked16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (LoadMasked16 ptr mask mem) + // cond: t.Size() == 64 + // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM mask) mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK16load512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(ptr, v0, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLoadMasked32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (LoadMasked32 ptr mask mem) + // cond: t.Size() == 16 + // result: (VPMASK32load128 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK32load128) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked32 ptr mask mem) + // cond: t.Size() == 32 + // result: (VPMASK32load256 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK32load256) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked32 ptr mask mem) + // cond: t.Size() == 64 + // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM mask) mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK32load512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(ptr, v0, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLoadMasked64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (LoadMasked64 ptr mask mem) + // cond: t.Size() == 16 + // result: (VPMASK64load128 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK64load128) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked64 ptr mask mem) + // cond: t.Size() == 32 + // result: (VPMASK64load256 ptr mask mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK64load256) + v.AddArg3(ptr, mask, mem) + return true + } + // match: (LoadMasked64 ptr mask mem) + // cond: t.Size() == 64 + // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM mask) mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK64load512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(ptr, v0, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLoadMasked8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (LoadMasked8 ptr mask mem) + // cond: t.Size() == 64 + // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM mask) mem) + for { + t := v.Type + ptr := v_0 + mask := v_1 + mem := v_2 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK8load512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(ptr, v0, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLocalAddr(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LocalAddr {sym} base mem) + // cond: t.Elem().HasPointers() + // result: (LEAQ {sym} (SPanchored base mem)) + for { + t := v.Type + sym := auxToSym(v.Aux) + base := v_0 + mem := v_1 + if !(t.Elem().HasPointers()) { + break + } + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr) + v0.AddArg2(base, mem) + v.AddArg(v0) + return true + } + // match: (LocalAddr {sym} base _) + // cond: !t.Elem().HasPointers() + // result: (LEAQ {sym} base) + for { + t := v.Type + sym := auxToSym(v.Aux) + base := v_0 + if !(!t.Elem().HasPointers()) { + break + } + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v.AddArg(base) + return true + } + return false +} +func rewriteValueAMD64_OpLsh16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh16x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh16x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh16x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh16x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh16x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh32x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh32x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh32x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh32x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh32x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh32x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh64x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh64x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPWconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh64x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh64x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPLconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh64x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh64x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPQconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh64x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPBconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh8x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh8x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh8x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpLsh8x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh8x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpMax32F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Max32F x y) + // result: (Neg32F (Min32F (Neg32F x) (Neg32F y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpNeg32F) + v.Type = t + v0 := b.NewValue0(v.Pos, OpMin32F, t) + v1 := b.NewValue0(v.Pos, OpNeg32F, t) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpNeg32F, t) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMax64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Max64F x y) + // result: (Neg64F (Min64F (Neg64F x) (Neg64F y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpNeg64F) + v.Type = t + v0 := b.NewValue0(v.Pos, OpMin64F, t) + v1 := b.NewValue0(v.Pos, OpNeg64F, t) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpNeg64F, t) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMin32F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Min32F x y) + // result: (POR (MINSS (MINSS x y) x) (MINSS x y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpAMD64POR) + v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t) + v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t) + v1.AddArg2(x, y) + v0.AddArg2(v1, x) + v.AddArg2(v0, v1) + return true + } +} +func rewriteValueAMD64_OpMin64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Min64F x y) + // result: (POR (MINSD (MINSD x y) x) (MINSD x y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpAMD64POR) + v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t) + v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t) + v1.AddArg2(x, y) + v0.AddArg2(v1, x) + v.AddArg2(v0, v1) + return true + } +} +func rewriteValueAMD64_OpMod16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod16 [a] x y) + // result: (Select1 (DIVW [a] x y)) + for { + a := auxIntToBool(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod16u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod16u x y) + // result: (Select1 (DIVWU x y)) + for { + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod32 [a] x y) + // result: (Select1 (DIVL [a] x y)) + for { + a := auxIntToBool(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod32u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod32u x y) + // result: (Select1 (DIVLU x y)) + for { + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod64 [a] x y) + // result: (Select1 (DIVQ [a] x y)) + for { + a := auxIntToBool(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod64u x y) + // result: (Select1 (DIVQU x y)) + for { + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod8 x y) + // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) + for { + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMod8u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod8u x y) + // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) + for { + x := v_0 + y := v_1 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpMove(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Move [0] _ _ mem) + // result: mem + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + mem := v_2 + v.copyOf(mem) + return true + } + // match: (Move [1] dst src mem) + // result: (MOVBstore dst (MOVBload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 1 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [2] dst src mem) + // result: (MOVWstore dst (MOVWload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 2 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [4] dst src mem) + // result: (MOVLstore dst (MOVLload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [8] dst src mem) + // result: (MOVQstore dst (MOVQload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVQstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [16] dst src mem) + // result: (MOVOstore dst (MOVOload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 16 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVOstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [3] dst src mem) + // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 3 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(2) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [5] dst src mem) + // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 5 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [6] dst src mem) + // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 6 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [7] dst src mem) + // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 7 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(3) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [9] dst src mem) + // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 9 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [10] dst src mem) + // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 10 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [11] dst src mem) + // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 11 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(7) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(7) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [12] dst src mem) + // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 12 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [s] dst src mem) + // cond: s >= 13 && s <= 15 + // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) + for { + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s >= 13 && s <= 15) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(int32(s - 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(int32(s - 8)) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) + return true + } + // match: (Move [s] dst src mem) + // cond: s > 16 && s < 192 && logLargeCopy(v, s) + // result: (LoweredMove [s] dst src mem) + for { + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > 16 && s < 192 && logLargeCopy(v, s)) { + break + } + v.reset(OpAMD64LoweredMove) + v.AuxInt = int64ToAuxInt(s) + v.AddArg3(dst, src, mem) + return true + } + // match: (Move [s] dst src mem) + // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s) + // result: (LoweredMoveLoop [s] dst src mem) + for { + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) { + break + } + v.reset(OpAMD64LoweredMoveLoop) + v.AuxInt = int64ToAuxInt(s) + v.AddArg3(dst, src, mem) + return true + } + // match: (Move [s] dst src mem) + // cond: s > repMoveThreshold && s%8 != 0 + // result: (Move [s-s%8] (OffPtr dst [s%8]) (OffPtr src [s%8]) (MOVQstore dst (MOVQload src mem) mem)) + for { + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > repMoveThreshold && s%8 != 0) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(s - s%8) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(s % 8) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(s % 8) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v3.AddArg2(src, mem) + v2.AddArg3(dst, v3, mem) + v.AddArg3(v0, v1, v2) + return true + } + // match: (Move [s] dst src mem) + // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s) + // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem) + for { + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) { + break + } + v.reset(OpAMD64REPMOVSQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(s / 8) + v.AddArg4(dst, src, v0, mem) + return true + } + return false +} +func rewriteValueAMD64_OpNeg32F(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Neg32F x) + // result: (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) + for { + x := v_0 + v.reset(OpAMD64PXOR) + v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32) + v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1))) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpNeg64F(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Neg64F x) + // result: (PXOR x (MOVSDconst [math.Copysign(0, -1)])) + for { + x := v_0 + v.reset(OpAMD64PXOR) + v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64) + v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1)) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpNeq16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq16 x y) + // result: (SETNE (CMPW x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq32 x y) + // result: (SETNE (CMPL x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq32F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq32F x y) + // result: (SETNEF (UCOMISS x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq64 x y) + // result: (SETNE (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq64F x y) + // result: (SETNEF (UCOMISD x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq8 x y) + // result: (SETNE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeqB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (NeqB x y) + // result: (SETNE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeqPtr(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (NeqPtr x y) + // result: (SETNE (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNot(v *Value) bool { + v_0 := v.Args[0] + // match: (Not x) + // result: (XORLconst [1] x) + for { + x := v_0 + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat32x4 x y) + // result: (VCMPPS128 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = uint8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat32x8 x y) + // result: (VCMPPS256 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = uint8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat64x2 x y) + // result: (VCMPPD128 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = uint8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat64x4 x y) + // result: (VCMPPD256 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = uint8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = uint8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpOffPtr(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (OffPtr [off] ptr) + // cond: is32Bit(off) + // result: (ADDQconst [int32(off)] ptr) + for { + off := auxIntToInt64(v.AuxInt) + ptr := v_0 + if !(is32Bit(off)) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(int32(off)) + v.AddArg(ptr) + return true + } + // match: (OffPtr [off] ptr) + // result: (ADDQ (MOVQconst [off]) ptr) + for { + off := auxIntToInt64(v.AuxInt) + ptr := v_0 + v.reset(OpAMD64ADDQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(off) + v.AddArg2(v0, ptr) + return true + } +} +func rewriteValueAMD64_OpPopCount16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (PopCount16 x) + // result: (POPCNTL (MOVWQZX x)) + for { + x := v_0 + v.reset(OpAMD64POPCNTL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpPopCount8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (PopCount8 x) + // result: (POPCNTL (MOVBQZX x)) + for { + x := v_0 + v.reset(OpAMD64POPCNTL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpRoundToEven(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEven x) + // result: (ROUNDSD [0] x) + for { + x := v_0 + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenFloat32x4 x) + // result: (VROUNDPS128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenFloat32x8 x) + // result: (VROUNDPS256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenFloat64x2 x) + // result: (VROUNDPD128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenFloat64x4 x) + // result: (VROUNDPD256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundToEvenScaledResidueFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+0] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = uint8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPWconst y [16]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh16Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPLconst y [16]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh16Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPQconst y [16]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh16Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPBconst y [16]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh16Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [16]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [16]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [16]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh16x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [16]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPWconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh32Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPLconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh32Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPQconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh32Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPBconst y [32]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh32Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [32]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [32]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [32]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh32x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [32]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPWconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh64Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPLconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh64Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPQconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh64Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPBconst y [64]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh64Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [64]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [64]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [64]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh64x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [64]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPWconst y [8]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh8Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPLconst y [8]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh8Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPQconst y [8]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh8Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPBconst y [8]))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Rsh8Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [8]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [8]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [8]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpRsh8x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Rsh8x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [8]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpSelect0(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Select0 (Mul64uover x y)) + // result: (Select0 (MULQU x y)) + for { + if v_0.Op != OpMul64uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpSelect0) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + // match: (Select0 (Mul32uover x y)) + // result: (Select0 (MULLU x y)) + for { + if v_0.Op != OpMul32uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpSelect0) + v.Type = typ.UInt32 + v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + // match: (Select0 (Add64carry x y c)) + // result: (Select0 (ADCQ x y (Select1 (NEGLflags c)))) + for { + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v2.AddArg(c) + v1.AddArg(v2) + v0.AddArg3(x, y, v1) + v.AddArg(v0) + return true + } + // match: (Select0 (Sub64borrow x y c)) + // result: (Select0 (SBBQ x y (Select1 (NEGLflags c)))) + for { + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v2.AddArg(c) + v1.AddArg(v2) + v0.AddArg3(x, y, v1) + v.AddArg(v0) + return true + } + // match: (Select0 (AddTupleFirst32 val tuple)) + // result: (ADDL val (Select0 tuple)) + for { + t := v.Type + if v_0.Op != OpAMD64AddTupleFirst32 { + break + } + tuple := v_0.Args[1] + val := v_0.Args[0] + v.reset(OpAMD64ADDL) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg2(val, v0) + return true + } + // match: (Select0 (AddTupleFirst64 val tuple)) + // result: (ADDQ val (Select0 tuple)) + for { + t := v.Type + if v_0.Op != OpAMD64AddTupleFirst64 { + break + } + tuple := v_0.Args[1] + val := v_0.Args[0] + v.reset(OpAMD64ADDQ) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg2(val, v0) + return true + } + // match: (Select0 a:(ADDQconstflags [c] x)) + // cond: a.Uses == 1 + // result: (ADDQconst [c] x) + for { + a := v_0 + if a.Op != OpAMD64ADDQconstflags { + break + } + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true + } + // match: (Select0 a:(ADDLconstflags [c] x)) + // cond: a.Uses == 1 + // result: (ADDLconst [c] x) + for { + a := v_0 + if a.Op != OpAMD64ADDLconstflags { + break + } + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64ADDLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpSelect1(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Select1 (Mul64uover x y)) + // result: (SETO (Select1 (MULQU x y))) + for { + if v_0.Op != OpMul64uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpAMD64SETO) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1.AddArg2(x, y) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (Mul32uover x y)) + // result: (SETO (Select1 (MULLU x y))) + for { + if v_0.Op != OpMul32uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpAMD64SETO) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) + v1.AddArg2(x, y) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (Add64carry x y c)) + // result: (MOVBQZX (SETB (Select1 (ADCQ x y (Select1 (NEGLflags c)))))) + for { + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpAMD64MOVBQZX) + v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8]) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v4.AddArg(c) + v3.AddArg(v4) + v2.AddArg3(x, y, v3) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (Sub64borrow x y c)) + // result: (MOVBQZX (SETB (Select1 (SBBQ x y (Select1 (NEGLflags c)))))) + for { + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpAMD64MOVBQZX) + v0 := b.NewValue0(v.Pos, OpAMD64SETB, types.Types[types.TUINT8]) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v4.AddArg(c) + v3.AddArg(v4) + v2.AddArg3(x, y, v3) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (NEGLflags (MOVQconst [0]))) + // result: (FlagEQ) + for { + if v_0.Op != OpAMD64NEGLflags { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 { + break + } + v.reset(OpAMD64FlagEQ) + return true + } + // match: (Select1 (NEGLflags (MOVBQZX (SETB x)))) + // result: x + for { + if v_0.Op != OpAMD64NEGLflags { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVBQZX { + break + } + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64SETB { + break + } + x := v_0_0_0.Args[0] + v.copyOf(x) + return true + } + // match: (Select1 (AddTupleFirst32 _ tuple)) + // result: (Select1 tuple) + for { + if v_0.Op != OpAMD64AddTupleFirst32 { + break + } + tuple := v_0.Args[1] + v.reset(OpSelect1) + v.AddArg(tuple) + return true + } + // match: (Select1 (AddTupleFirst64 _ tuple)) + // result: (Select1 tuple) + for { + if v_0.Op != OpAMD64AddTupleFirst64 { + break + } + tuple := v_0.Args[1] + v.reset(OpSelect1) + v.AddArg(tuple) + return true + } + // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ANDQlock ptr val mem) + for { + a := v_0 + if a.Op != OpAMD64LoweredAtomicAnd64 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ANDQlock) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ANDLlock ptr val mem) + for { + a := v_0 + if a.Op != OpAMD64LoweredAtomicAnd32 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ANDLlock) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Select1 a:(LoweredAtomicOr64 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ORQlock ptr val mem) + for { + a := v_0 + if a.Op != OpAMD64LoweredAtomicOr64 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ORQlock) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Select1 a:(LoweredAtomicOr32 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ORLlock ptr val mem) + for { + a := v_0 + if a.Op != OpAMD64LoweredAtomicOr32 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ORLlock) + v.AddArg3(ptr, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpSelectN(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) + // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call) + // result: (Move [sc.Val64()] dst src mem) + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + call := v_0 + if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 { + break + } + sym := auxToCall(call.Aux) + s1 := call.Args[0] + if s1.Op != OpAMD64MOVQstoreconst { + break + } + sc := auxIntToValAndOff(s1.AuxInt) + _ = s1.Args[1] + s2 := s1.Args[1] + if s2.Op != OpAMD64MOVQstore { + break + } + _ = s2.Args[2] + src := s2.Args[1] + s3 := s2.Args[2] + if s3.Op != OpAMD64MOVQstore { + break + } + mem := s3.Args[2] + dst := s3.Args[1] + if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(sc.Val64()) + v.AddArg3(dst, src, mem) + return true + } + // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) + // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) + // result: (Move [sz] dst src mem) + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + call := v_0 + if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 { + break + } + sym := auxToCall(call.Aux) + mem := call.Args[3] + dst := call.Args[0] + src := call.Args[1] + call_2 := call.Args[2] + if call_2.Op != OpAMD64MOVQconst { + break + } + sz := auxIntToInt64(call_2.AuxInt) + if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(sz) + v.AddArg3(dst, src, mem) + return true + } + return false +} +func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat32x16 x y) + // result: (VINSERTF64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat32x8 x y) + // result: (VINSERTF128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat64x4 x y) + // result: (VINSERTF128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat64x8 x y) + // result: (VINSERTF64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt16x16 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt16x32 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt32x16 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt32x8 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt64x4 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt64x8 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt8x32 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt8x64 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint16x16 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint16x32 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint32x16 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint32x8 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint64x4 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint64x8 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint8x32 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint8x64 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat32x16 x y) + // result: (VINSERTF64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat32x8 x y) + // result: (VINSERTF128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat64x4 x y) + // result: (VINSERTF128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat64x8 x y) + // result: (VINSERTF64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt16x16 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt16x32 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt32x16 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt32x8 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt64x4 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt64x8 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt8x32 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt8x64 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint16x16 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint16x32 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint32x16 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint32x8 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint64x4 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint64x8 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint8x32 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint8x64 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSlicemask(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (Slicemask x) + // result: (SARQconst (NEGQ x) [63]) + for { + t := v.Type + x := v_0 + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(63) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpSpectreIndex(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SpectreIndex x y) + // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64CMOVQCC) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v1.AddArg2(x, y) + v.AddArg3(x, v0, v1) + return true + } +} +func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SpectreSliceIndex x y) + // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64CMOVQHI) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v1.AddArg2(x, y) + v.AddArg3(x, v0, v1) + return true + } +} +func rewriteValueAMD64_OpStore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Store {t} ptr val mem) + // cond: t.Size() == 8 && t.IsFloat() + // result: (MOVSDstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 8 && t.IsFloat()) { + break + } + v.reset(OpAMD64MOVSDstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 4 && t.IsFloat() + // result: (MOVSSstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 4 && t.IsFloat()) { + break + } + v.reset(OpAMD64MOVSSstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 8 && !t.IsFloat() + // result: (MOVQstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 8 && !t.IsFloat()) { + break + } + v.reset(OpAMD64MOVQstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 4 && !t.IsFloat() + // result: (MOVLstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 4 && !t.IsFloat()) { + break + } + v.reset(OpAMD64MOVLstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 2 + // result: (MOVWstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 2) { + break + } + v.reset(OpAMD64MOVWstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 1 + // result: (MOVBstore ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 1) { + break + } + v.reset(OpAMD64MOVBstore) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 16 + // result: (VMOVDQUstore128 ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VMOVDQUstore128) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 32 + // result: (VMOVDQUstore256 ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VMOVDQUstore256) + v.AddArg3(ptr, val, mem) + return true + } + // match: (Store {t} ptr val mem) + // cond: t.Size() == 64 + // result: (VMOVDQUstore512 ptr val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VMOVDQUstore512) + v.AddArg3(ptr, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpStoreMasked16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (StoreMasked16 {t} ptr mask val mem) + // cond: t.Size() == 64 + // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM mask) val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK16store512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(ptr, v0, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpStoreMasked32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (StoreMasked32 {t} ptr mask val mem) + // cond: t.Size() == 16 + // result: (VPMASK32store128 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK32store128) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked32 {t} ptr mask val mem) + // cond: t.Size() == 32 + // result: (VPMASK32store256 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK32store256) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked32 {t} ptr mask val mem) + // cond: t.Size() == 64 + // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM mask) val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK32store512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(ptr, v0, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpStoreMasked64(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (StoreMasked64 {t} ptr mask val mem) + // cond: t.Size() == 16 + // result: (VPMASK64store128 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VPMASK64store128) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked64 {t} ptr mask val mem) + // cond: t.Size() == 32 + // result: (VPMASK64store256 ptr mask val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VPMASK64store256) + v.AddArg4(ptr, mask, val, mem) + return true + } + // match: (StoreMasked64 {t} ptr mask val mem) + // cond: t.Size() == 64 + // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM mask) val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK64store512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(ptr, v0, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpStoreMasked8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (StoreMasked8 {t} ptr mask val mem) + // cond: t.Size() == 64 + // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM mask) val mem) + for { + t := auxToType(v.Aux) + ptr := v_0 + mask := v_1 + val := v_2 + mem := v_3 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VPMASK8store512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(ptr, v0, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpTrunc(v *Value) bool { + v_0 := v.Args[0] + // match: (Trunc x) + // result: (ROUNDSD [3] x) + for { + x := v_0 + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat32x4 x) + // result: (VROUNDPS128 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat32x8 x) + // result: (VROUNDPS256 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat64x2 x) + // result: (VROUNDPD128 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat64x4 x) + // result: (VROUNDPD256 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = uint8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncScaledResidueFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+3] x) + for { + a := auxIntToUint8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = uint8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpZero(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Zero [0] _ mem) + // result: mem + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + mem := v_1 + v.copyOf(mem) + return true + } + // match: (Zero [1] destptr mem) + // result: (MOVBstoreconst [makeValAndOff(0,0)] destptr mem) + for { + if auxIntToInt64(v.AuxInt) != 1 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [2] destptr mem) + // result: (MOVWstoreconst [makeValAndOff(0,0)] destptr mem) + for { + if auxIntToInt64(v.AuxInt) != 2 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [4] destptr mem) + // result: (MOVLstoreconst [makeValAndOff(0,0)] destptr mem) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [8] destptr mem) + // result: (MOVQstoreconst [makeValAndOff(0,0)] destptr mem) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [3] destptr mem) + // result: (MOVBstoreconst [makeValAndOff(0,2)] destptr (MOVWstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 3 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 2)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [5] destptr mem) + // result: (MOVBstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 5 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 4)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [6] destptr mem) + // result: (MOVWstoreconst [makeValAndOff(0,4)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 6 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 4)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [7] destptr mem) + // result: (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 7 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 3)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [9] destptr mem) + // result: (MOVBstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 9 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [10] destptr mem) + // result: (MOVWstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 10 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [11] destptr mem) + // result: (MOVLstoreconst [makeValAndOff(0,7)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 11 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 7)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [12] destptr mem) + // result: (MOVLstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + if auxIntToInt64(v.AuxInt) != 12 { + break + } + destptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [s] destptr mem) + // cond: s > 12 && s < 16 + // result: (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + s := auxIntToInt64(v.AuxInt) + destptr := v_0 + mem := v_1 + if !(s > 12 && s < 16) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, int32(s-8))) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v0.AddArg2(destptr, mem) + v.AddArg2(destptr, v0) + return true + } + // match: (Zero [s] destptr mem) + // cond: s >= 16 && s < 192 + // result: (LoweredZero [s] destptr mem) + for { + s := auxIntToInt64(v.AuxInt) + destptr := v_0 + mem := v_1 + if !(s >= 16 && s < 192) { + break + } + v.reset(OpAMD64LoweredZero) + v.AuxInt = int64ToAuxInt(s) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [s] destptr mem) + // cond: s >= 192 && s <= repZeroThreshold + // result: (LoweredZeroLoop [s] destptr mem) + for { + s := auxIntToInt64(v.AuxInt) + destptr := v_0 + mem := v_1 + if !(s >= 192 && s <= repZeroThreshold) { + break + } + v.reset(OpAMD64LoweredZeroLoop) + v.AuxInt = int64ToAuxInt(s) + v.AddArg2(destptr, mem) + return true + } + // match: (Zero [s] destptr mem) + // cond: s > repZeroThreshold && s%8 != 0 + // result: (Zero [s-s%8] (OffPtr destptr [s%8]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) + for { + s := auxIntToInt64(v.AuxInt) + destptr := v_0 + mem := v_1 + if !(s > repZeroThreshold && s%8 != 0) { + break + } + v.reset(OpZero) + v.AuxInt = int64ToAuxInt(s - s%8) + v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) + v0.AuxInt = int64ToAuxInt(s % 8) + v0.AddArg(destptr) + v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem) + v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) + v1.AddArg2(destptr, mem) + v.AddArg2(v0, v1) + return true + } + // match: (Zero [s] destptr mem) + // cond: s > repZeroThreshold && s%8 == 0 + // result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) + for { + s := auxIntToInt64(v.AuxInt) + destptr := v_0 + mem := v_1 + if !(s > repZeroThreshold && s%8 == 0) { + break + } + v.reset(OpAMD64REPSTOSQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(s / 8) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(0) + v.AddArg4(destptr, v0, v1, mem) + return true + } + return false +} +func rewriteValueAMD64_OpZeroSIMD(v *Value) bool { + // match: (ZeroSIMD ) + // cond: t.Size() == 16 + // result: (Zero128 ) + for { + t := v.Type + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64Zero128) + v.Type = t + return true + } + // match: (ZeroSIMD ) + // cond: t.Size() == 32 + // result: (Zero256 ) + for { + t := v.Type + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64Zero256) + v.Type = t + return true + } + // match: (ZeroSIMD ) + // cond: t.Size() == 64 + // result: (Zero512 ) + for { + t := v.Type + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64Zero512) + v.Type = t + return true + } + return false +} +func rewriteValueAMD64_OpblendMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (blendMaskedInt16x32 x y mask) + // result: (VPBLENDMWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPBLENDMWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpblendMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (blendMaskedInt32x16 x y mask) + // result: (VPBLENDMDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPBLENDMDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpblendMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (blendMaskedInt64x8 x y mask) + // result: (VPBLENDMQMasked512 x y (VPMOVVec64x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPBLENDMQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpblendMaskedInt8x64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (blendMaskedInt8x64 x y mask) + // result: (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPBLENDMBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteBlockAMD64(b *Block) bool { + typ := &b.Func.Config.Types + switch b.Kind { + case BlockAMD64EQ: + // match: (EQ (TESTL (SHLL (MOVLconst [1]) x) y)) + // result: (UGE (BTL x y)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTQ (SHLQ (MOVQconst [1]) x) y)) + // result: (UGE (BTQ x y)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTLconst [c] x)) + // cond: isPowerOfTwo(uint32(c)) + // result: (UGE (BTLconst [int8(log32u(uint32(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTLconst { + v_0 := b.Controls[0] + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint32(c))) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + // match: (EQ (TESTQconst [c] x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (UGE (BTQconst [int8(log32u(uint32(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTQconst { + v_0 := b.Controls[0] + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint64(c))) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + // match: (EQ (TESTQ (MOVQconst [c]) x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (UGE (BTQconst [int8(log64u(uint64(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPowerOfTwo(uint64(c))) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) + // cond: z1==z2 + // result: (UGE (BTQconst [63] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) + // cond: z1==z2 + // result: (UGE (BTQconst [31] x)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) + // cond: z1==z2 + // result: (UGE (BTQconst [0] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) + // cond: z1==z2 + // result: (UGE (BTLconst [0] x)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTQ z1:(SHRQconst [63] x) z2)) + // cond: z1==z2 + // result: (UGE (BTQconst [63] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (TESTL z1:(SHRLconst [31] x) z2)) + // cond: z1==z2 + // result: (UGE (BTLconst [31] x)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + b.resetWithControl(BlockAMD64UGE, v0) + return true + } + break + } + // match: (EQ (InvertFlags cmp) yes no) + // result: (EQ cmp yes no) + for b.Controls[0].Op == OpAMD64InvertFlags { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64EQ, cmp) + return true + } + // match: (EQ (FlagEQ) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagEQ { + b.Reset(BlockFirst) + return true + } + // match: (EQ (FlagLT_ULT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_ULT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (EQ (FlagLT_UGT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_UGT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (EQ (FlagGT_ULT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagGT_ULT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (EQ (FlagGT_UGT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagGT_UGT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (EQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no) + // result: (EQ (Select1 blsr) yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { + continue + } + v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no) + // result: (EQ (Select1 blsr) yes no) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRL || s != v_0_1 { + continue + } + v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ t:(TESTQ a:(ADDQconst [c] x) a)) + // cond: t.Uses == 1 && flagify(a) + // result: (EQ (Select1 a.Args[0])) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + a := t_0 + if a.Op != OpAMD64ADDQconst { + continue + } + if a != t_1 || !(t.Uses == 1 && flagify(a)) { + continue + } + v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(a.Args[0]) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ t:(TESTL a:(ADDLconst [c] x) a)) + // cond: t.Uses == 1 && flagify(a) + // result: (EQ (Select1 a.Args[0])) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + a := t_0 + if a.Op != OpAMD64ADDLconst { + continue + } + if a != t_1 || !(t.Uses == 1 && flagify(a)) { + continue + } + v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(a.Args[0]) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (VPTEST x:(VPAND128 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (EQ (VPTEST j k) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPAND128 { + break + } + _ = x.Args[1] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { + j := x_0 + k := x_1 + if !(x == y && x.Uses == 2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (VPTEST x:(VPAND256 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (EQ (VPTEST j k) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPAND256 { + break + } + _ = x.Args[1] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { + j := x_0 + k := x_1 + if !(x == y && x.Uses == 2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (VPTEST x:(VPANDD512 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (EQ (VPTEST j k) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDD512 { + break + } + _ = x.Args[1] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { + j := x_0 + k := x_1 + if !(x == y && x.Uses == 2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (VPTEST x:(VPANDQ512 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (EQ (VPTEST j k) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDQ512 { + break + } + _ = x.Args[1] + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { + j := x_0 + k := x_1 + if !(x == y && x.Uses == 2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(j, k) + b.resetWithControl(BlockAMD64EQ, v0) + return true + } + break + } + // match: (EQ (VPTEST x:(VPANDN128 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (ULT (VPTEST k j) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDN128 { + break + } + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (EQ (VPTEST x:(VPANDN256 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (ULT (VPTEST k j) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDN256 { + break + } + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (EQ (VPTEST x:(VPANDND512 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (ULT (VPTEST k j) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDND512 { + break + } + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (EQ (VPTEST x:(VPANDNQ512 j k) y) yes no) + // cond: x == y && x.Uses == 2 + // result: (ULT (VPTEST k j) yes no) + for b.Controls[0].Op == OpAMD64VPTEST { + v_0 := b.Controls[0] + y := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpAMD64VPANDNQ512 { + break + } + k := x.Args[1] + j := x.Args[0] + if !(x == y && x.Uses == 2) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) + v0.AddArg2(k, j) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true + } + break + } + // match: (EQ t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true + } + break + } + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true + } break } - v.reset(OpAMD64LoweredZeroLoop) - v.AuxInt = int64ToAuxInt(s) - v.AddArg2(destptr, mem) - return true - } - // match: (Zero [s] destptr mem) - // cond: s > repZeroThreshold && s%8 != 0 - // result: (Zero [s-s%8] (OffPtr destptr [s%8]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) - for { - s := auxIntToInt64(v.AuxInt) - destptr := v_0 - mem := v_1 - if !(s > repZeroThreshold && s%8 != 0) { + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) + return true + } break } - v.reset(OpZero) - v.AuxInt = int64ToAuxInt(s - s%8) - v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) - v0.AuxInt = int64ToAuxInt(s % 8) - v0.AddArg(destptr) - v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem) - v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0)) - v1.AddArg2(destptr, mem) - v.AddArg2(v0, v1) - return true - } - // match: (Zero [s] destptr mem) - // cond: s > repZeroThreshold && s%8 == 0 - // result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) - for { - s := auxIntToInt64(v.AuxInt) - destptr := v_0 - mem := v_1 - if !(s > repZeroThreshold && s%8 == 0) { + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true + } break } - v.reset(OpAMD64REPSTOSQ) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(s / 8) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v1.AuxInt = int64ToAuxInt(0) - v.AddArg4(destptr, v0, v1, mem) - return true - } - return false -} -func rewriteValueAMD64_OpZeroSIMD(v *Value) bool { - // match: (ZeroSIMD ) - // cond: t.Size() == 16 - // result: (Zero128 ) - for { - t := v.Type - if !(t.Size() == 16) { + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true + } break } - v.reset(OpAMD64Zero128) - v.Type = t - return true - } - // match: (ZeroSIMD ) - // cond: t.Size() == 32 - // result: (Zero256 ) - for { - t := v.Type - if !(t.Size() == 32) { + // match: (EQ t:(TESTL x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true + } break } - v.reset(OpAMD64Zero256) - v.Type = t - return true - } - // match: (ZeroSIMD ) - // cond: t.Size() == 64 - // result: (Zero512 ) - for { - t := v.Type - if !(t.Size() == 64) { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true + } break } - v.reset(OpAMD64Zero512) - v.Type = t - return true - } - return false -} -func rewriteValueAMD64_OpblendMaskedInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (blendMaskedInt16x32 x y mask) - // result: (VPBLENDMWMasked512 x y (VPMOVVec16x32ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPBLENDMWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpblendMaskedInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (blendMaskedInt32x16 x y mask) - // result: (VPBLENDMDMasked512 x y (VPMOVVec32x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPBLENDMDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpblendMaskedInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (blendMaskedInt64x8 x y mask) - // result: (VPBLENDMQMasked512 x y (VPMOVVec64x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPBLENDMQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpblendMaskedInt8x64(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (blendMaskedInt8x64 x y mask) - // result: (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPBLENDMBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteBlockAMD64(b *Block) bool { - typ := &b.Func.Config.Types - switch b.Kind { - case BlockAMD64EQ: - // match: (EQ (TESTL (SHLL (MOVLconst [1]) x) y)) - // result: (UGE (BTL x y)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLL { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + s := x.Args[0] + if s.Op != OpAMD64SETNE { continue } - y := v_0_1 - v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - b.resetWithControl(BlockAMD64UGE, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) return true } break } - // match: (EQ (TESTQ (SHLQ (MOVQconst [1]) x) y)) - // result: (UGE (BTQ x y)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLQ { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + s := x.Args[0] + if s.Op != OpAMD64SETL { continue } - y := v_0_1 - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - b.resetWithControl(BlockAMD64UGE, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) return true } break } - // match: (EQ (TESTLconst [c] x)) - // cond: isPowerOfTwo(uint32(c)) - // result: (UGE (BTLconst [int8(log32u(uint32(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTLconst { - v_0 := b.Controls[0] - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint32(c))) { - break + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) - return true + break } - // match: (EQ (TESTQconst [c] x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (UGE (BTQconst [int8(log32u(uint32(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTQconst { - v_0 := b.Controls[0] - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint64(c))) { - break + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) - return true + break } - // match: (EQ (TESTQ (MOVQconst [c]) x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (UGE (BTQconst [int8(log64u(uint64(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64MOVQconst { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - c := auxIntToInt64(v_0_0.AuxInt) - x := v_0_1 - if !(isPowerOfTwo(uint64(c))) { + s := x.Args[0] + if s.Op != OpAMD64SETGE { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) return true } break } - // match: (EQ (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) - // cond: z1==z2 - // result: (UGE (BTQconst [63] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + s := x.Args[0] + if s.Op != OpAMD64SETA { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64ULE, flags) return true } break } - // match: (EQ (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) - // cond: z1==z2 - // result: (UGE (BTQconst [31] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { + s := x.Args[0] + if s.Op != OpAMD64SETB { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64UGE, flags) return true } break } - // match: (EQ (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) - // cond: z1==z2 - // result: (UGE (BTQconst [0] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + s := x.Args[0] + if s.Op != OpAMD64SETAE { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64ULT, flags) return true } break } - // match: (EQ (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) - // cond: z1==z2 - // result: (UGE (BTLconst [0] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (EQ t:(TESTW x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + s := x.Args[0] + if s.Op != OpAMD64SETBE { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64UGT, flags) return true } break } - // match: (EQ (TESTQ z1:(SHRQconst [63] x) z2)) - // cond: z1==z2 - // result: (UGE (BTQconst [63] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (EQ t:(TESTB s:(SETEQ flags) s) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { continue } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64NE, flags) return true } break } - // match: (EQ (TESTL z1:(SHRLconst [31] x) z2)) - // cond: z1==z2 - // result: (UGE (BTLconst [31] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (EQ t:(TESTB s:(SETNE flags) s) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { continue } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - b.resetWithControl(BlockAMD64UGE, v0) + b.resetWithControl(BlockAMD64EQ, flags) return true } break } - // match: (EQ (InvertFlags cmp) yes no) - // result: (EQ cmp yes no) - for b.Controls[0].Op == OpAMD64InvertFlags { - v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64EQ, cmp) - return true - } - // match: (EQ (FlagEQ) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagEQ { - b.Reset(BlockFirst) - return true - } - // match: (EQ (FlagLT_ULT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_ULT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true - } - // match: (EQ (FlagLT_UGT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_UGT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true - } - // match: (EQ (FlagGT_ULT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagGT_ULT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true - } - // match: (EQ (FlagGT_UGT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagGT_UGT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true - } - // match: (EQ (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no) - // result: (EQ (Select1 blsr) yes no) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { + // match: (EQ t:(TESTB s:(SETL flags) s) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - b.resetWithControl(BlockAMD64EQ, v0) + b.resetWithControl(BlockAMD64GE, flags) return true } break } - // match: (EQ (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no) - // result: (EQ (Select1 blsr) yes no) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { + // match: (EQ t:(TESTB s:(SETG flags) s) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRL || s != v_0_1 { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - b.resetWithControl(BlockAMD64EQ, v0) + b.resetWithControl(BlockAMD64LE, flags) return true } break } - // match: (EQ t:(TESTQ a:(ADDQconst [c] x) a)) - // cond: t.Uses == 1 && flagify(a) - // result: (EQ (Select1 a.Args[0])) - for b.Controls[0].Op == OpAMD64TESTQ { + // match: (EQ t:(TESTB s:(SETLE flags) s) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { t := b.Controls[0] _ = t.Args[1] t_0 := t.Args[0] t_1 := t.Args[1] for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { - a := t_0 - if a.Op != OpAMD64ADDQconst { + s := t_0 + if s.Op != OpAMD64SETLE { continue } - if a != t_1 || !(t.Uses == 1 && flagify(a)) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(a.Args[0]) - b.resetWithControl(BlockAMD64EQ, v0) + b.resetWithControl(BlockAMD64GT, flags) return true } break } - // match: (EQ t:(TESTL a:(ADDLconst [c] x) a)) - // cond: t.Uses == 1 && flagify(a) - // result: (EQ (Select1 a.Args[0])) - for b.Controls[0].Op == OpAMD64TESTL { + // match: (EQ t:(TESTB s:(SETGE flags) s) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { t := b.Controls[0] _ = t.Args[1] t_0 := t.Args[0] t_1 := t.Args[1] for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { - a := t_0 - if a.Op != OpAMD64ADDLconst { + s := t_0 + if s.Op != OpAMD64SETGE { continue } - if a != t_1 || !(t.Uses == 1 && flagify(a)) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(a.Args[0]) - b.resetWithControl(BlockAMD64EQ, v0) + b.resetWithControl(BlockAMD64LT, flags) return true } break } - // match: (EQ (VPTEST x:(VPAND128 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (EQ (VPTEST j k) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPAND128 { - break - } - _ = x.Args[1] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { - j := x_0 - k := x_1 - if !(x == y && x.Uses == 2) { + // match: (EQ t:(TESTB s:(SETA flags) s) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - b.resetWithControl(BlockAMD64EQ, v0) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) return true } break } - // match: (EQ (VPTEST x:(VPAND256 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (EQ (VPTEST j k) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPAND256 { - break - } - _ = x.Args[1] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { - j := x_0 - k := x_1 - if !(x == y && x.Uses == 2) { + // match: (EQ t:(TESTB s:(SETB flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - b.resetWithControl(BlockAMD64EQ, v0) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) return true } break } - // match: (EQ (VPTEST x:(VPANDD512 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (EQ (VPTEST j k) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDD512 { - break - } - _ = x.Args[1] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { - j := x_0 - k := x_1 - if !(x == y && x.Uses == 2) { + // match: (EQ t:(TESTB s:(SETAE flags) s) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - b.resetWithControl(BlockAMD64EQ, v0) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) return true } break } - // match: (EQ (VPTEST x:(VPANDQ512 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (EQ (VPTEST j k) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDQ512 { - break - } - _ = x.Args[1] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i0 := 0; _i0 <= 1; _i0, x_0, x_1 = _i0+1, x_1, x_0 { - j := x_0 - k := x_1 - if !(x == y && x.Uses == 2) { + // match: (EQ t:(TESTB s:(SETBE flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(j, k) - b.resetWithControl(BlockAMD64EQ, v0) + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) return true } break } - // match: (EQ (VPTEST x:(VPANDN128 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (ULT (VPTEST k j) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDN128 { + case BlockAMD64GE: + // match: (GE c:(CMPQconst [128] z) yes no) + // cond: c.Uses == 1 + // result: (GT (CMPQconst [127] z) yes no) + for b.Controls[0].Op == OpAMD64CMPQconst { + c := b.Controls[0] + if auxIntToInt32(c.AuxInt) != 128 { break } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { + z := c.Args[0] + if !(c.Uses == 1) { break } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - b.resetWithControl(BlockAMD64ULT, v0) + v0 := b.NewValue0(c.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + b.resetWithControl(BlockAMD64GT, v0) + return true + } + // match: (GE c:(CMPLconst [128] z) yes no) + // cond: c.Uses == 1 + // result: (GT (CMPLconst [127] z) yes no) + for b.Controls[0].Op == OpAMD64CMPLconst { + c := b.Controls[0] + if auxIntToInt32(c.AuxInt) != 128 { + break + } + z := c.Args[0] + if !(c.Uses == 1) { + break + } + v0 := b.NewValue0(c.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(127) + v0.AddArg(z) + b.resetWithControl(BlockAMD64GT, v0) + return true + } + // match: (GE (InvertFlags cmp) yes no) + // result: (LE cmp yes no) + for b.Controls[0].Op == OpAMD64InvertFlags { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64LE, cmp) + return true + } + // match: (GE (FlagEQ) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagEQ { + b.Reset(BlockFirst) + return true + } + // match: (GE (FlagLT_ULT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_ULT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (GE (FlagLT_UGT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_UGT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (GE (FlagGT_ULT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagGT_ULT { + b.Reset(BlockFirst) + return true + } + // match: (GE (FlagGT_UGT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagGT_UGT { + b.Reset(BlockFirst) + return true + } + case BlockAMD64GT: + // match: (GT (InvertFlags cmp) yes no) + // result: (LT cmp yes no) + for b.Controls[0].Op == OpAMD64InvertFlags { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64LT, cmp) + return true + } + // match: (GT (FlagEQ) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagEQ { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (GT (FlagLT_ULT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_ULT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (GT (FlagLT_UGT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagLT_UGT { + b.Reset(BlockFirst) + b.swapSuccessors() + return true + } + // match: (GT (FlagGT_ULT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagGT_ULT { + b.Reset(BlockFirst) + return true + } + // match: (GT (FlagGT_UGT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagGT_UGT { + b.Reset(BlockFirst) + return true + } + case BlockIf: + // match: (If (SETL cmp) yes no) + // result: (LT cmp yes no) + for b.Controls[0].Op == OpAMD64SETL { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64LT, cmp) + return true + } + // match: (If (SETLE cmp) yes no) + // result: (LE cmp yes no) + for b.Controls[0].Op == OpAMD64SETLE { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64LE, cmp) + return true + } + // match: (If (SETG cmp) yes no) + // result: (GT cmp yes no) + for b.Controls[0].Op == OpAMD64SETG { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64GT, cmp) + return true + } + // match: (If (SETGE cmp) yes no) + // result: (GE cmp yes no) + for b.Controls[0].Op == OpAMD64SETGE { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64GE, cmp) + return true + } + // match: (If (SETEQ cmp) yes no) + // result: (EQ cmp yes no) + for b.Controls[0].Op == OpAMD64SETEQ { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64EQ, cmp) + return true + } + // match: (If (SETNE cmp) yes no) + // result: (NE cmp yes no) + for b.Controls[0].Op == OpAMD64SETNE { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64NE, cmp) + return true + } + // match: (If (SETB cmp) yes no) + // result: (ULT cmp yes no) + for b.Controls[0].Op == OpAMD64SETB { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64ULT, cmp) + return true + } + // match: (If (SETBE cmp) yes no) + // result: (ULE cmp yes no) + for b.Controls[0].Op == OpAMD64SETBE { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64ULE, cmp) + return true + } + // match: (If (SETA cmp) yes no) + // result: (UGT cmp yes no) + for b.Controls[0].Op == OpAMD64SETA { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64UGT, cmp) + return true + } + // match: (If (SETAE cmp) yes no) + // result: (UGE cmp yes no) + for b.Controls[0].Op == OpAMD64SETAE { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64UGE, cmp) + return true + } + // match: (If (SETO cmp) yes no) + // result: (OS cmp yes no) + for b.Controls[0].Op == OpAMD64SETO { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64OS, cmp) + return true + } + // match: (If (SETGF cmp) yes no) + // result: (UGT cmp yes no) + for b.Controls[0].Op == OpAMD64SETGF { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64UGT, cmp) + return true + } + // match: (If (SETGEF cmp) yes no) + // result: (UGE cmp yes no) + for b.Controls[0].Op == OpAMD64SETGEF { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64UGE, cmp) + return true + } + // match: (If (SETEQF cmp) yes no) + // result: (EQF cmp yes no) + for b.Controls[0].Op == OpAMD64SETEQF { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64EQF, cmp) + return true + } + // match: (If (SETNEF cmp) yes no) + // result: (NEF cmp yes no) + for b.Controls[0].Op == OpAMD64SETNEF { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64NEF, cmp) + return true + } + // match: (If cond yes no) + // result: (NE (TESTB cond cond) yes no) + for { + cond := b.Controls[0] + v0 := b.NewValue0(cond.Pos, OpAMD64TESTB, types.TypeFlags) + v0.AddArg2(cond, cond) + b.resetWithControl(BlockAMD64NE, v0) + return true + } + case BlockJumpTable: + // match: (JumpTable idx) + // result: (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ {makeJumpTableSym(b)} (SB))) + for { + idx := b.Controls[0] + v0 := b.NewValue0(b.Pos, OpAMD64LEAQ, typ.Uintptr) + v0.Aux = symToAux(makeJumpTableSym(b)) + v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr) + v0.AddArg(v1) + b.resetWithControl2(BlockAMD64JUMPTABLE, idx, v0) + b.Aux = symToAux(makeJumpTableSym(b)) + return true + } + case BlockAMD64LE: + // match: (LE (InvertFlags cmp) yes no) + // result: (GE cmp yes no) + for b.Controls[0].Op == OpAMD64InvertFlags { + v_0 := b.Controls[0] + cmp := v_0.Args[0] + b.resetWithControl(BlockAMD64GE, cmp) + return true + } + // match: (LE (FlagEQ) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagEQ { + b.Reset(BlockFirst) + return true + } + // match: (LE (FlagLT_ULT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagLT_ULT { + b.Reset(BlockFirst) return true } - // match: (EQ (VPTEST x:(VPANDN256 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (ULT (VPTEST k j) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDN256 { - break - } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { - break - } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - b.resetWithControl(BlockAMD64ULT, v0) + // match: (LE (FlagLT_UGT) yes no) + // result: (First yes no) + for b.Controls[0].Op == OpAMD64FlagLT_UGT { + b.Reset(BlockFirst) return true } - // match: (EQ (VPTEST x:(VPANDND512 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (ULT (VPTEST k j) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDND512 { - break - } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { - break - } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - b.resetWithControl(BlockAMD64ULT, v0) + // match: (LE (FlagGT_ULT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagGT_ULT { + b.Reset(BlockFirst) + b.swapSuccessors() return true } - // match: (EQ (VPTEST x:(VPANDNQ512 j k) y) yes no) - // cond: x == y && x.Uses == 2 - // result: (ULT (VPTEST k j) yes no) - for b.Controls[0].Op == OpAMD64VPTEST { - v_0 := b.Controls[0] - y := v_0.Args[1] - x := v_0.Args[0] - if x.Op != OpAMD64VPANDNQ512 { - break - } - k := x.Args[1] - j := x.Args[0] - if !(x == y && x.Uses == 2) { - break - } - v0 := b.NewValue0(v_0.Pos, OpAMD64VPTEST, types.TypeFlags) - v0.AddArg2(k, j) - b.resetWithControl(BlockAMD64ULT, v0) + // match: (LE (FlagGT_UGT) yes no) + // result: (First no yes) + for b.Controls[0].Op == OpAMD64FlagGT_UGT { + b.Reset(BlockFirst) + b.swapSuccessors() return true } - case BlockAMD64GE: - // match: (GE c:(CMPQconst [128] z) yes no) + case BlockAMD64LT: + // match: (LT c:(CMPQconst [128] z) yes no) // cond: c.Uses == 1 - // result: (GT (CMPQconst [127] z) yes no) + // result: (LE (CMPQconst [127] z) yes no) for b.Controls[0].Op == OpAMD64CMPQconst { c := b.Controls[0] if auxIntToInt32(c.AuxInt) != 128 { @@ -78409,12 +91312,12 @@ func rewriteBlockAMD64(b *Block) bool { v0 := b.NewValue0(c.Pos, OpAMD64CMPQconst, types.TypeFlags) v0.AuxInt = int32ToAuxInt(127) v0.AddArg(z) - b.resetWithControl(BlockAMD64GT, v0) + b.resetWithControl(BlockAMD64LE, v0) return true } - // match: (GE c:(CMPLconst [128] z) yes no) + // match: (LT c:(CMPLconst [128] z) yes no) // cond: c.Uses == 1 - // result: (GT (CMPLconst [127] z) yes no) + // result: (LE (CMPLconst [127] z) yes no) for b.Controls[0].Op == OpAMD64CMPLconst { c := b.Controls[0] if auxIntToInt32(c.AuxInt) != 128 { @@ -78427,1013 +91330,2110 @@ func rewriteBlockAMD64(b *Block) bool { v0 := b.NewValue0(c.Pos, OpAMD64CMPLconst, types.TypeFlags) v0.AuxInt = int32ToAuxInt(127) v0.AddArg(z) - b.resetWithControl(BlockAMD64GT, v0) + b.resetWithControl(BlockAMD64LE, v0) return true } - // match: (GE (InvertFlags cmp) yes no) - // result: (LE cmp yes no) + // match: (LT (InvertFlags cmp) yes no) + // result: (GT cmp yes no) for b.Controls[0].Op == OpAMD64InvertFlags { v_0 := b.Controls[0] cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64LE, cmp) - return true - } - // match: (GE (FlagEQ) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagEQ { - b.Reset(BlockFirst) - return true - } - // match: (GE (FlagLT_ULT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_ULT { - b.Reset(BlockFirst) - b.swapSuccessors() + b.resetWithControl(BlockAMD64GT, cmp) return true } - // match: (GE (FlagLT_UGT) yes no) + // match: (LT (FlagEQ) yes no) // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_UGT { + for b.Controls[0].Op == OpAMD64FlagEQ { b.Reset(BlockFirst) b.swapSuccessors() return true } - // match: (GE (FlagGT_ULT) yes no) + // match: (LT (FlagLT_ULT) yes no) // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagGT_ULT { + for b.Controls[0].Op == OpAMD64FlagLT_ULT { b.Reset(BlockFirst) return true } - // match: (GE (FlagGT_UGT) yes no) + // match: (LT (FlagLT_UGT) yes no) // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagGT_UGT { - b.Reset(BlockFirst) - return true - } - case BlockAMD64GT: - // match: (GT (InvertFlags cmp) yes no) - // result: (LT cmp yes no) - for b.Controls[0].Op == OpAMD64InvertFlags { - v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64LT, cmp) - return true - } - // match: (GT (FlagEQ) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagEQ { + for b.Controls[0].Op == OpAMD64FlagLT_UGT { b.Reset(BlockFirst) - b.swapSuccessors() return true } - // match: (GT (FlagLT_ULT) yes no) + // match: (LT (FlagGT_ULT) yes no) // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_ULT { + for b.Controls[0].Op == OpAMD64FlagGT_ULT { b.Reset(BlockFirst) b.swapSuccessors() return true } - // match: (GT (FlagLT_UGT) yes no) + // match: (LT (FlagGT_UGT) yes no) // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagLT_UGT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true - } - // match: (GT (FlagGT_ULT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagGT_ULT { - b.Reset(BlockFirst) - return true - } - // match: (GT (FlagGT_UGT) yes no) - // result: (First yes no) for b.Controls[0].Op == OpAMD64FlagGT_UGT { b.Reset(BlockFirst) + b.swapSuccessors() return true } - case BlockIf: - // match: (If (SETL cmp) yes no) + case BlockAMD64NE: + // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no) // result: (LT cmp yes no) - for b.Controls[0].Op == OpAMD64SETL { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETL { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETL || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64LT, cmp) return true } - // match: (If (SETLE cmp) yes no) + // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) // result: (LE cmp yes no) - for b.Controls[0].Op == OpAMD64SETLE { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETLE { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETLE || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64LE, cmp) return true } - // match: (If (SETG cmp) yes no) + // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no) // result: (GT cmp yes no) - for b.Controls[0].Op == OpAMD64SETG { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETG { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETG || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64GT, cmp) return true } - // match: (If (SETGE cmp) yes no) + // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) // result: (GE cmp yes no) - for b.Controls[0].Op == OpAMD64SETGE { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETGE { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETGE || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64GE, cmp) return true } - // match: (If (SETEQ cmp) yes no) + // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) // result: (EQ cmp yes no) - for b.Controls[0].Op == OpAMD64SETEQ { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETEQ { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETEQ || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64EQ, cmp) return true } - // match: (If (SETNE cmp) yes no) + // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) // result: (NE cmp yes no) - for b.Controls[0].Op == OpAMD64SETNE { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETNE { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETNE || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64NE, cmp) return true } - // match: (If (SETB cmp) yes no) + // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no) // result: (ULT cmp yes no) - for b.Controls[0].Op == OpAMD64SETB { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETB { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETB || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64ULT, cmp) return true } - // match: (If (SETBE cmp) yes no) + // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) // result: (ULE cmp yes no) - for b.Controls[0].Op == OpAMD64SETBE { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETBE { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETBE || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64ULE, cmp) return true } - // match: (If (SETA cmp) yes no) - // result: (UGT cmp yes no) - for b.Controls[0].Op == OpAMD64SETA { + // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no) + // result: (UGT cmp yes no) + for b.Controls[0].Op == OpAMD64TESTB { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETA { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETA || cmp != v_0_1.Args[0] { + break + } + b.resetWithControl(BlockAMD64UGT, cmp) + return true + } + // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) + // result: (UGE cmp yes no) + for b.Controls[0].Op == OpAMD64TESTB { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETAE { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETAE || cmp != v_0_1.Args[0] { + break + } + b.resetWithControl(BlockAMD64UGE, cmp) + return true + } + // match: (NE (TESTB (SETO cmp) (SETO cmp)) yes no) + // result: (OS cmp yes no) + for b.Controls[0].Op == OpAMD64TESTB { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETO { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETO || cmp != v_0_1.Args[0] { + break + } + b.resetWithControl(BlockAMD64OS, cmp) + return true + } + // match: (NE (TESTL (SHLL (MOVLconst [1]) x) y)) + // result: (ULT (BTL x y)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLL { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTQ (SHLQ (MOVQconst [1]) x) y)) + // result: (ULT (BTQ x y)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64SHLQ { + continue + } + x := v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + continue + } + y := v_0_1 + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) + v0.AddArg2(x, y) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTLconst [c] x)) + // cond: isPowerOfTwo(uint32(c)) + // result: (ULT (BTLconst [int8(log32u(uint32(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTLconst { + v_0 := b.Controls[0] + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint32(c))) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (NE (TESTQconst [c] x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (ULT (BTQconst [int8(log32u(uint32(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTQconst { + v_0 := b.Controls[0] + c := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + if !(isPowerOfTwo(uint64(c))) { + break + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + // match: (NE (TESTQ (MOVQconst [c]) x)) + // cond: isPowerOfTwo(uint64(c)) + // result: (ULT (BTQconst [int8(log64u(uint64(c)))] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpAMD64MOVQconst { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPowerOfTwo(uint64(c))) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) + // cond: z1==z2 + // result: (ULT (BTQconst [63] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) + // cond: z1==z2 + // result: (ULT (BTQconst [31] x)) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) + // cond: z1==z2 + // result: (ULT (BTQconst [0] x)) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break + } + // match: (NE (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) + // cond: z1==z2 + // result: (ULT (BTLconst [0] x)) + for b.Controls[0].Op == OpAMD64TESTL { v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64UGT, cmp) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + z1_0 := z1.Args[0] + if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + continue + } + x := z1_0.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(0) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break } - // match: (If (SETAE cmp) yes no) - // result: (UGE cmp yes no) - for b.Controls[0].Op == OpAMD64SETAE { + // match: (NE (TESTQ z1:(SHRQconst [63] x) z2)) + // cond: z1==z2 + // result: (ULT (BTQconst [63] x)) + for b.Controls[0].Op == OpAMD64TESTQ { v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64UGE, cmp) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(63) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break } - // match: (If (SETO cmp) yes no) - // result: (OS cmp yes no) - for b.Controls[0].Op == OpAMD64SETO { + // match: (NE (TESTL z1:(SHRLconst [31] x) z2)) + // cond: z1==z2 + // result: (ULT (BTLconst [31] x)) + for b.Controls[0].Op == OpAMD64TESTL { v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64OS, cmp) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + z1 := v_0_0 + if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + continue + } + x := z1.Args[0] + z2 := v_0_1 + if !(z1 == z2) { + continue + } + v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) + v0.AuxInt = int8ToAuxInt(31) + v0.AddArg(x) + b.resetWithControl(BlockAMD64ULT, v0) + return true + } + break } - // match: (If (SETGF cmp) yes no) + // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) // result: (UGT cmp yes no) - for b.Controls[0].Op == OpAMD64SETGF { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETGF { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETGF || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64UGT, cmp) return true } - // match: (If (SETGEF cmp) yes no) + // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) // result: (UGE cmp yes no) - for b.Controls[0].Op == OpAMD64SETGEF { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETGEF { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETGEF || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64UGE, cmp) return true } - // match: (If (SETEQF cmp) yes no) + // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) // result: (EQF cmp yes no) - for b.Controls[0].Op == OpAMD64SETEQF { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETEQF { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETEQF || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64EQF, cmp) return true } - // match: (If (SETNEF cmp) yes no) + // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) // result: (NEF cmp yes no) - for b.Controls[0].Op == OpAMD64SETNEF { + for b.Controls[0].Op == OpAMD64TESTB { v_0 := b.Controls[0] - cmp := v_0.Args[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64SETNEF { + break + } + cmp := v_0_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpAMD64SETNEF || cmp != v_0_1.Args[0] { + break + } b.resetWithControl(BlockAMD64NEF, cmp) return true } - // match: (If cond yes no) - // result: (NE (TESTB cond cond) yes no) - for { - cond := b.Controls[0] - v0 := b.NewValue0(cond.Pos, OpAMD64TESTB, types.TypeFlags) - v0.AddArg2(cond, cond) - b.resetWithControl(BlockAMD64NE, v0) - return true - } - case BlockJumpTable: - // match: (JumpTable idx) - // result: (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ {makeJumpTableSym(b)} (SB))) - for { - idx := b.Controls[0] - v0 := b.NewValue0(b.Pos, OpAMD64LEAQ, typ.Uintptr) - v0.Aux = symToAux(makeJumpTableSym(b)) - v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr) - v0.AddArg(v1) - b.resetWithControl2(BlockAMD64JUMPTABLE, idx, v0) - b.Aux = symToAux(makeJumpTableSym(b)) - return true - } - case BlockAMD64LE: - // match: (LE (InvertFlags cmp) yes no) - // result: (GE cmp yes no) + // match: (NE (InvertFlags cmp) yes no) + // result: (NE cmp yes no) for b.Controls[0].Op == OpAMD64InvertFlags { v_0 := b.Controls[0] cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64GE, cmp) + b.resetWithControl(BlockAMD64NE, cmp) return true } - // match: (LE (FlagEQ) yes no) - // result: (First yes no) + // match: (NE (FlagEQ) yes no) + // result: (First no yes) for b.Controls[0].Op == OpAMD64FlagEQ { b.Reset(BlockFirst) + b.swapSuccessors() return true } - // match: (LE (FlagLT_ULT) yes no) + // match: (NE (FlagLT_ULT) yes no) // result: (First yes no) for b.Controls[0].Op == OpAMD64FlagLT_ULT { b.Reset(BlockFirst) return true } - // match: (LE (FlagLT_UGT) yes no) + // match: (NE (FlagLT_UGT) yes no) // result: (First yes no) for b.Controls[0].Op == OpAMD64FlagLT_UGT { b.Reset(BlockFirst) return true } - // match: (LE (FlagGT_ULT) yes no) - // result: (First no yes) + // match: (NE (FlagGT_ULT) yes no) + // result: (First yes no) for b.Controls[0].Op == OpAMD64FlagGT_ULT { b.Reset(BlockFirst) - b.swapSuccessors() return true } - // match: (LE (FlagGT_UGT) yes no) - // result: (First no yes) + // match: (NE (FlagGT_UGT) yes no) + // result: (First yes no) for b.Controls[0].Op == OpAMD64FlagGT_UGT { b.Reset(BlockFirst) - b.swapSuccessors() return true } - case BlockAMD64LT: - // match: (LT c:(CMPQconst [128] z) yes no) - // cond: c.Uses == 1 - // result: (LE (CMPQconst [127] z) yes no) - for b.Controls[0].Op == OpAMD64CMPQconst { - c := b.Controls[0] - if auxIntToInt32(c.AuxInt) != 128 { - break + // match: (NE (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no) + // result: (NE (Select1 blsr) yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { + continue + } + v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + b.resetWithControl(BlockAMD64NE, v0) + return true } - z := c.Args[0] - if !(c.Uses == 1) { - break + break + } + // match: (NE (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no) + // result: (NE (Select1 blsr) yes no) + for b.Controls[0].Op == OpAMD64TESTL { + v_0 := b.Controls[0] + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + s := v_0_0 + if s.Op != OpSelect0 { + continue + } + blsr := s.Args[0] + if blsr.Op != OpAMD64BLSRL || s != v_0_1 { + continue + } + v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(blsr) + b.resetWithControl(BlockAMD64NE, v0) + return true } - v0 := b.NewValue0(c.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - b.resetWithControl(BlockAMD64LE, v0) - return true + break } - // match: (LT c:(CMPLconst [128] z) yes no) - // cond: c.Uses == 1 - // result: (LE (CMPLconst [127] z) yes no) - for b.Controls[0].Op == OpAMD64CMPLconst { - c := b.Controls[0] - if auxIntToInt32(c.AuxInt) != 128 { - break + // match: (NE t:(TESTQ a:(ADDQconst [c] x) a)) + // cond: t.Uses == 1 && flagify(a) + // result: (NE (Select1 a.Args[0])) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + a := t_0 + if a.Op != OpAMD64ADDQconst { + continue + } + if a != t_1 || !(t.Uses == 1 && flagify(a)) { + continue + } + v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(a.Args[0]) + b.resetWithControl(BlockAMD64NE, v0) + return true } - z := c.Args[0] - if !(c.Uses == 1) { - break + break + } + // match: (NE t:(TESTL a:(ADDLconst [c] x) a)) + // cond: t.Uses == 1 && flagify(a) + // result: (NE (Select1 a.Args[0])) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + a := t_0 + if a.Op != OpAMD64ADDLconst { + continue + } + if a != t_1 || !(t.Uses == 1 && flagify(a)) { + continue + } + v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) + v0.AddArg(a.Args[0]) + b.resetWithControl(BlockAMD64NE, v0) + return true } - v0 := b.NewValue0(c.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(127) - v0.AddArg(z) - b.resetWithControl(BlockAMD64LE, v0) - return true + break } - // match: (LT (InvertFlags cmp) yes no) - // result: (GT cmp yes no) - for b.Controls[0].Op == OpAMD64InvertFlags { - v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64GT, cmp) - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) + return true + } + break } - // match: (LT (FlagEQ) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagEQ { - b.Reset(BlockFirst) - b.swapSuccessors() - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true + } + break } - // match: (LT (FlagLT_ULT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagLT_ULT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true + } + break } - // match: (LT (FlagLT_UGT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagLT_UGT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true + } + break } - // match: (LT (FlagGT_ULT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagGT_ULT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true + } + break } - // match: (LT (FlagGT_UGT) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagGT_UGT { - b.Reset(BlockFirst) - b.swapSuccessors() - return true + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) + return true + } + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true + } + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true + } + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true + } + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) + return true + } + break } - case BlockAMD64NE: - // match: (NE (TESTB (SETL cmp) (SETL cmp)) yes no) - // result: (LT cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETL { - break + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETEQF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQF flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQF, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETL || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETNEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NEF flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NEF, flags) + return true } - b.resetWithControl(BlockAMD64LT, cmp) - return true + break } - // match: (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) - // result: (LE cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETLE { - break + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETGF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETLE || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTQ x:(MOVBQZX s:(SETGEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTQ { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true } - b.resetWithControl(BlockAMD64LE, cmp) - return true + break } - // match: (NE (TESTB (SETG cmp) (SETG cmp)) yes no) - // result: (GT cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETG { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETG || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true } - b.resetWithControl(BlockAMD64GT, cmp) - return true + break } - // match: (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) - // result: (GE cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETGE { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETGE || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true } - b.resetWithControl(BlockAMD64GE, cmp) - return true + break } - // match: (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) - // result: (EQ cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETEQ { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETEQ || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) + return true } - b.resetWithControl(BlockAMD64EQ, cmp) - return true + break } - // match: (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) - // result: (NE cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETNE { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETNE || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true } - b.resetWithControl(BlockAMD64NE, cmp) - return true + break } - // match: (NE (TESTB (SETB cmp) (SETB cmp)) yes no) - // result: (ULT cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETB { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETB || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) + return true } - b.resetWithControl(BlockAMD64ULT, cmp) - return true + break } - // match: (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) - // result: (ULE cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETBE { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETEQF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQF flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQF, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETBE || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETNEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NEF flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NEF, flags) + return true } - b.resetWithControl(BlockAMD64ULE, cmp) - return true + break } - // match: (NE (TESTB (SETA cmp) (SETA cmp)) yes no) - // result: (UGT cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETA { - break + // match: (NE t:(TESTL x:(MOVBQZX s:(SETGF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETA || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTL x:(MOVBQZX s:(SETGEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTL { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGEF { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true } - b.resetWithControl(BlockAMD64UGT, cmp) - return true + break } - // match: (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) - // result: (UGE cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETAE { - break + // match: (NE t:(TESTW x:(MOVBQZX s:(SETEQ flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETEQ { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64EQ, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETAE || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTW x:(MOVBQZX s:(SETNE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETNE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64NE, flags) + return true } - b.resetWithControl(BlockAMD64UGE, cmp) - return true + break } - // match: (NE (TESTB (SETO cmp) (SETO cmp)) yes no) - // result: (OS cmp yes no) - for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETO { - break + // match: (NE t:(TESTW x:(MOVBQZX s:(SETL flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETO || cmp != v_0_1.Args[0] { - break + break + } + // match: (NE t:(TESTW x:(MOVBQZX s:(SETG flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true } - b.resetWithControl(BlockAMD64OS, cmp) - return true + break } - // match: (NE (TESTL (SHLL (MOVLconst [1]) x) y)) - // result: (ULT (BTL x y)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLL { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETLE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0_0.AuxInt) != 1 { + s := x.Args[0] + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true + } + break + } + // match: (NE t:(TESTW x:(MOVBQZX s:(SETGE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - y := v_0_1 - v0 := b.NewValue0(v_0.Pos, OpAMD64BTL, types.TypeFlags) - v0.AddArg2(x, y) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64GE, flags) return true } break } - // match: (NE (TESTQ (SHLQ (MOVQconst [1]) x) y)) - // result: (ULT (BTQ x y)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64SHLQ { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETA flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - x := v_0_0.Args[1] - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0_0.AuxInt) != 1 { + s := x.Args[0] + if s.Op != OpAMD64SETA { continue } - y := v_0_1 - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQ, types.TypeFlags) - v0.AddArg2(x, y) - b.resetWithControl(BlockAMD64ULT, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) return true } break } - // match: (NE (TESTLconst [c] x)) - // cond: isPowerOfTwo(uint32(c)) - // result: (ULT (BTLconst [int8(log32u(uint32(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTLconst { - v_0 := b.Controls[0] - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint32(c))) { - break + // match: (NE t:(TESTW x:(MOVBQZX s:(SETB flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) - return true + break } - // match: (NE (TESTQconst [c] x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (ULT (BTQconst [int8(log32u(uint32(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTQconst { - v_0 := b.Controls[0] - c := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - if !(isPowerOfTwo(uint64(c))) { - break + // match: (NE t:(TESTW x:(MOVBQZX s:(SETAE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { + continue + } + s := x.Args[0] + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) - return true + break } - // match: (NE (TESTQ (MOVQconst [c]) x)) - // cond: isPowerOfTwo(uint64(c)) - // result: (ULT (BTQconst [int8(log64u(uint64(c)))] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - if v_0_0.Op != OpAMD64MOVQconst { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETBE flags)) x) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - c := auxIntToInt64(v_0_0.AuxInt) - x := v_0_1 - if !(isPowerOfTwo(uint64(c))) { + s := x.Args[0] + if s.Op != OpAMD64SETBE { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c)))) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) return true } break } - // match: (NE (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) - // cond: z1==z2 - // result: (ULT (BTQconst [63] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETEQF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (EQF flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + s := x.Args[0] + if s.Op != OpAMD64SETEQF { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64EQF, flags) return true } break } - // match: (NE (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) - // cond: z1==z2 - // result: (ULT (BTQconst [31] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHLLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETNEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (NEF flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHRQconst || auxIntToInt8(z1_0.AuxInt) != 31 { + s := x.Args[0] + if s.Op != OpAMD64SETNEF { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64NEF, flags) return true } break } - // match: (NE (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) - // cond: z1==z2 - // result: (ULT (BTQconst [0] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETGF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLQconst || auxIntToInt8(z1_0.AuxInt) != 63 { + s := x.Args[0] + if s.Op != OpAMD64SETGF { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64UGE, flags) return true } break } - // match: (NE (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) - // cond: z1==z2 - // result: (ULT (BTLconst [0] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (NE t:(TESTW x:(MOVBQZX s:(SETGEF flags)) x) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTW { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + x := t_0 + if x.Op != OpAMD64MOVBQZX { continue } - z1_0 := z1.Args[0] - if z1_0.Op != OpAMD64SHLLconst || auxIntToInt8(z1_0.AuxInt) != 31 { + s := x.Args[0] + if s.Op != OpAMD64SETGEF { continue } - x := z1_0.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if x != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64UGT, flags) return true } break } - // match: (NE (TESTQ z1:(SHRQconst [63] x) z2)) - // cond: z1==z2 - // result: (ULT (BTQconst [63] x)) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRQconst || auxIntToInt8(z1.AuxInt) != 63 { + // match: (NE t:(TESTB s:(SETEQ flags) s) yes no) + // cond: t.Block == s.Block + // result: (EQ flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQ { continue } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(63) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64EQ, flags) return true } break } - // match: (NE (TESTL z1:(SHRLconst [31] x) z2)) - // cond: z1==z2 - // result: (ULT (BTLconst [31] x)) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - z1 := v_0_0 - if z1.Op != OpAMD64SHRLconst || auxIntToInt8(z1.AuxInt) != 31 { + // match: (NE t:(TESTB s:(SETNE flags) s) yes no) + // cond: t.Block == s.Block + // result: (NE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNE { continue } - x := z1.Args[0] - z2 := v_0_1 - if !(z1 == z2) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags) - v0.AuxInt = int8ToAuxInt(31) - v0.AddArg(x) - b.resetWithControl(BlockAMD64ULT, v0) + b.resetWithControl(BlockAMD64NE, flags) return true } break } - // match: (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) - // result: (UGT cmp yes no) + // match: (NE t:(TESTB s:(SETL flags) s) yes no) + // cond: t.Block == s.Block + // result: (LT flags yes no) for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETGF { - break - } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETGF || cmp != v_0_1.Args[0] { - break + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETL { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LT, flags) + return true } - b.resetWithControl(BlockAMD64UGT, cmp) - return true + break } - // match: (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) - // result: (UGE cmp yes no) + // match: (NE t:(TESTB s:(SETG flags) s) yes no) + // cond: t.Block == s.Block + // result: (GT flags yes no) for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETGEF { - break - } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETGEF || cmp != v_0_1.Args[0] { - break + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETG { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GT, flags) + return true } - b.resetWithControl(BlockAMD64UGE, cmp) - return true + break } - // match: (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) - // result: (EQF cmp yes no) + // match: (NE t:(TESTB s:(SETLE flags) s) yes no) + // cond: t.Block == s.Block + // result: (LE flags yes no) for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETEQF { - break - } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETEQF || cmp != v_0_1.Args[0] { - break + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETLE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64LE, flags) + return true } - b.resetWithControl(BlockAMD64EQF, cmp) - return true + break } - // match: (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) - // result: (NEF cmp yes no) + // match: (NE t:(TESTB s:(SETGE flags) s) yes no) + // cond: t.Block == s.Block + // result: (GE flags yes no) for b.Controls[0].Op == OpAMD64TESTB { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64SETNEF { - break - } - cmp := v_0_0.Args[0] - v_0_1 := v_0.Args[1] - if v_0_1.Op != OpAMD64SETNEF || cmp != v_0_1.Args[0] { - break + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETGE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64GE, flags) + return true } - b.resetWithControl(BlockAMD64NEF, cmp) - return true - } - // match: (NE (InvertFlags cmp) yes no) - // result: (NE cmp yes no) - for b.Controls[0].Op == OpAMD64InvertFlags { - v_0 := b.Controls[0] - cmp := v_0.Args[0] - b.resetWithControl(BlockAMD64NE, cmp) - return true - } - // match: (NE (FlagEQ) yes no) - // result: (First no yes) - for b.Controls[0].Op == OpAMD64FlagEQ { - b.Reset(BlockFirst) - b.swapSuccessors() - return true + break } - // match: (NE (FlagLT_ULT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagLT_ULT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTB s:(SETA flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETA { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGT, flags) + return true + } + break } - // match: (NE (FlagLT_UGT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagLT_UGT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTB s:(SETB flags) s) yes no) + // cond: t.Block == s.Block + // result: (ULT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETB { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULT, flags) + return true + } + break } - // match: (NE (FlagGT_ULT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagGT_ULT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTB s:(SETAE flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETAE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64UGE, flags) + return true + } + break } - // match: (NE (FlagGT_UGT) yes no) - // result: (First yes no) - for b.Controls[0].Op == OpAMD64FlagGT_UGT { - b.Reset(BlockFirst) - return true + // match: (NE t:(TESTB s:(SETBE flags) s) yes no) + // cond: t.Block == s.Block + // result: (ULE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETBE { + continue + } + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { + continue + } + b.resetWithControl(BlockAMD64ULE, flags) + return true + } + break } - // match: (NE (TESTQ s:(Select0 blsr:(BLSRQ _)) s) yes no) - // result: (NE (Select1 blsr) yes no) - for b.Controls[0].Op == OpAMD64TESTQ { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { + // match: (NE t:(TESTB s:(SETEQF flags) s) yes no) + // cond: t.Block == s.Block + // result: (EQF flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETEQF { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRQ || s != v_0_1 { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - b.resetWithControl(BlockAMD64NE, v0) + b.resetWithControl(BlockAMD64EQF, flags) return true } break } - // match: (NE (TESTL s:(Select0 blsr:(BLSRL _)) s) yes no) - // result: (NE (Select1 blsr) yes no) - for b.Controls[0].Op == OpAMD64TESTL { - v_0 := b.Controls[0] - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - s := v_0_0 - if s.Op != OpSelect0 { + // match: (NE t:(TESTB s:(SETNEF flags) s) yes no) + // cond: t.Block == s.Block + // result: (NEF flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { + t := b.Controls[0] + _ = t.Args[1] + t_0 := t.Args[0] + t_1 := t.Args[1] + for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { + s := t_0 + if s.Op != OpAMD64SETNEF { continue } - blsr := s.Args[0] - if blsr.Op != OpAMD64BLSRL || s != v_0_1 { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(v_0.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(blsr) - b.resetWithControl(BlockAMD64NE, v0) + b.resetWithControl(BlockAMD64NEF, flags) return true } break } - // match: (NE t:(TESTQ a:(ADDQconst [c] x) a)) - // cond: t.Uses == 1 && flagify(a) - // result: (NE (Select1 a.Args[0])) - for b.Controls[0].Op == OpAMD64TESTQ { + // match: (NE t:(TESTB s:(SETGF flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGE flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { t := b.Controls[0] _ = t.Args[1] t_0 := t.Args[0] t_1 := t.Args[1] for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { - a := t_0 - if a.Op != OpAMD64ADDQconst { + s := t_0 + if s.Op != OpAMD64SETGF { continue } - if a != t_1 || !(t.Uses == 1 && flagify(a)) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(a.Args[0]) - b.resetWithControl(BlockAMD64NE, v0) + b.resetWithControl(BlockAMD64UGE, flags) return true } break } - // match: (NE t:(TESTL a:(ADDLconst [c] x) a)) - // cond: t.Uses == 1 && flagify(a) - // result: (NE (Select1 a.Args[0])) - for b.Controls[0].Op == OpAMD64TESTL { + // match: (NE t:(TESTB s:(SETGEF flags) s) yes no) + // cond: t.Block == s.Block + // result: (UGT flags yes no) + for b.Controls[0].Op == OpAMD64TESTB { t := b.Controls[0] _ = t.Args[1] t_0 := t.Args[0] t_1 := t.Args[1] for _i0 := 0; _i0 <= 1; _i0, t_0, t_1 = _i0+1, t_1, t_0 { - a := t_0 - if a.Op != OpAMD64ADDLconst { + s := t_0 + if s.Op != OpAMD64SETGEF { continue } - if a != t_1 || !(t.Uses == 1 && flagify(a)) { + flags := s.Args[0] + if s != t_1 || !(t.Block == s.Block) { continue } - v0 := b.NewValue0(t.Pos, OpSelect1, types.TypeFlags) - v0.AddArg(a.Args[0]) - b.resetWithControl(BlockAMD64NE, v0) + b.resetWithControl(BlockAMD64UGT, flags) return true } break diff --git a/src/database/sql/fakedb_test.go b/src/database/sql/fakedb_test.go index 9a6c93c26583ea..23715e221f2149 100644 --- a/src/database/sql/fakedb_test.go +++ b/src/database/sql/fakedb_test.go @@ -62,8 +62,21 @@ type fakeConnector struct { func (c *fakeConnector) Connect(context.Context) (driver.Conn, error) { conn, err := fdriver.Open(c.name) + if err != nil { + return nil, err + } conn.(*fakeConn).waiter = c.waiter - return conn, err + return conn, nil +} + +func getFakeConn(c driver.Conn) *fakeConn { + return c.(interface { + getFakeConn() *fakeConn + }).getFakeConn() +} + +func (c *fakeConn) getFakeConn() *fakeConn { + return c } func (c *fakeConnector) Driver() driver.Driver { diff --git a/src/database/sql/sql_test.go b/src/database/sql/sql_test.go index e77b5e3c059808..ada9f62927297e 100644 --- a/src/database/sql/sql_test.go +++ b/src/database/sql/sql_test.go @@ -25,6 +25,72 @@ import ( "uuid" ) +type requireFeature string + +// testDatabase executes f in a synctest bubble. +// +// It executes several subtests, each with a database driver supporting +// a different set of optional interfaces (QueryerContext, etc.). +// +// Limit a test to drivers implementing a certain feature by passing +// a requireFeature option. For example: +// +// // testFunc only executes with drivers which implement Validator. +// testDatabase(t, testFunc, requireFeature("Validator")) +func testDatabase(t *testing.T, f func(t *testing.T, db *DB), opts ...any) { + var require []string + for _, o := range opts { + switch o := o.(type) { + case requireFeature: + require = append(require, string(o)) + default: + t.Fatalf("unrecognized option %T", o) + } + } +Test: + for _, test := range []struct { + name string + connector driver.Connector + features []string + }{ + { + // Basic driver supporting none of the optional driver interfaces. + name: "basic", + connector: &basicConnector{name: fakeDBName}, + }, + { + // Default test driver. Supports some but not all features. + // This is the "default" because this is the only driver we used + // before adding testDatabase. + name: "default", + connector: &fakeConnector{name: fakeDBName}, + features: []string{ + "ConnBeginTx", + "NamedValue", + "Validator", + }, + }, + } { + for _, req := range require { + if !slices.Contains(test.features, req) { + continue Test + } + } + t.Run(test.name, func(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + db := OpenDB(test.connector) + if _, err := db.Exec("WIPE"); err != nil { + t.Fatalf("exec wipe: %v", err) + } + t.Cleanup(func() { + closeDB(t, db) + }) + f(t, db) + }) + }) + } +} + func init() { type dbConn struct { db *DB @@ -71,23 +137,106 @@ func newTestDBConnector(t testing.TB, fc *fakeConnector, name string) *DB { t.Cleanup(func() { closeDB(t, db) }) - if name == "people" { + if name != "" { + populate(t, db, name) + } + return db +} + +func populate(t testing.TB, db *DB, name string) { + t.Helper() + switch name { + case "people": exec(t, db, "CREATE|people|name=string,age=int32,photo=blob,dead=bool,bdate=datetime") exec(t, db, "INSERT|people|name=Alice,age=?,photo=APHOTO", 1) exec(t, db, "INSERT|people|name=Bob,age=?,photo=BPHOTO", 2) exec(t, db, "INSERT|people|name=Chris,age=?,photo=CPHOTO,bdate=?", 3, chrisBirthday) - } - if name == "magicquery" { + case "magicquery": // Magic table name and column, known by fakedb_test.go. exec(t, db, "CREATE|magicquery|op=string,millis=int32") exec(t, db, "INSERT|magicquery|op=sleep,millis=10") - } - if name == "tx_status" { + case "tx_status": // Magic table name and column, known by fakedb_test.go. exec(t, db, "CREATE|tx_status|tx_status=string") exec(t, db, "INSERT|tx_status|tx_status=invalid") + default: + t.Fatalf("unknown database name %q", name) } - return db +} + +// basicConn implements only the bare minimum of the driver.Conn interface. +type basicConn struct { + driver.Conn +} + +func (c *basicConn) getFakeConn() *fakeConn { + return c.Conn.(*fakeConn) +} + +func (c *basicConn) Prepare(query string) (driver.Stmt, error) { + stmt, err := c.Conn.(*fakeConn).PrepareContext(context.Background(), query) + if err != nil { + return nil, err + } + return &basicStmt{fc: c.Conn.(*fakeConn), Stmt: stmt}, nil +} + +func (c *basicConn) Begin() (driver.Tx, error) { + return c.Conn.(*fakeConn).Begin() +} + +func (c *basicConn) Close() error { + return c.Conn.(*fakeConn).Close() +} + +type basicStmt struct { + fc *fakeConn + driver.Stmt +} + +func (s *basicStmt) valuesToNamedValues(args []driver.Value) ([]driver.NamedValue, error) { + nv := make([]driver.NamedValue, len(args)) + for i, arg := range args { + val, err := s.Stmt.(*fakeStmt).ColumnConverter(i).ConvertValue(arg) + if err != nil { + return nil, fmt.Errorf("sql: converting argument $%v type: %w", i+1, err) + } + nv[i] = driver.NamedValue{ + Ordinal: i + 1, + Value: val, + } + } + return nv, nil +} + +func (s *basicStmt) Exec(args []driver.Value) (driver.Result, error) { + nvs, err := s.valuesToNamedValues(args) + if err != nil { + return nil, err + } + return s.Stmt.(*fakeStmt).ExecContext(context.Background(), nvs) +} + +func (s *basicStmt) Query(args []driver.Value) (driver.Rows, error) { + nvs, err := s.valuesToNamedValues(args) + if err != nil { + return nil, err + } + return s.Stmt.(*fakeStmt).QueryContext(context.Background(), nvs) +} + +type basicConnector struct { + fakeConnector +} + +func (c *basicConnector) Connect(ctx context.Context) (driver.Conn, error) { + conn, err := c.fakeConnector.Connect(ctx) + if err != nil { + return nil, err + } + fc := getFakeConn(conn) + fc.skipDirtySession = true // Conn won't implement ResetSession + return &basicConn{fc}, nil } func TestOpenDB(t *testing.T) { @@ -210,7 +359,7 @@ func numPrepares(t *testing.T, db *DB) int { if n := len(db.freeConn); n != 1 { t.Fatalf("free conns = %d; want 1", n) } - return db.freeConn[0].ci.(*fakeConn).numPrepare + return getFakeConn(db.freeConn[0].ci).numPrepare } func (db *DB) numDeps() int { @@ -262,10 +411,10 @@ func (db *DB) dumpDep(t *testing.T, depth int, dep finalCloser, seen map[finalCl } func TestQuery(t *testing.T) { - synctest.Test(t, testQuery) + testDatabase(t, testQuery) } -func testQuery(t *testing.T) { - db := newTestDB(t, "people") +func testQuery(t *testing.T, db *DB) { + populate(t, db, "people") prepares0 := numPrepares(t, db) rows, err := db.Query("SELECT|people|age,name|") if err != nil { @@ -310,10 +459,10 @@ func testQuery(t *testing.T) { // TestQueryContext tests canceling the context while scanning the rows. func TestQueryContext(t *testing.T) { - synctest.Test(t, testQueryContext) + testDatabase(t, testQueryContext) } -func testQueryContext(t *testing.T) { - db := newTestDB(t, "people") +func testQueryContext(t *testing.T, db *DB) { + populate(t, db, "people") prepares0 := numPrepares(t, db) ctx, cancel := context.WithCancel(context.Background()) @@ -395,10 +544,10 @@ func waitForRowsClose(t *testing.T, rows *Rows) { // TestQueryContextWait ensures that rows and all internal statements are closed when // a query context is closed during execution. func TestQueryContextWait(t *testing.T) { - synctest.Test(t, testQueryContextWait) + testDatabase(t, testQueryContextWait) } -func testQueryContextWait(t *testing.T) { - db := newTestDB(t, "people") +func testQueryContextWait(t *testing.T, db *DB) { + populate(t, db, "people") prepares0 := numPrepares(t, db) ctx, cancel := context.WithCancel(context.Background()) @@ -412,7 +561,7 @@ func testQueryContextWait(t *testing.T) { t.Fatal(err) } - c.dc.ci.(*fakeConn).waiter = func(c context.Context) { + getFakeConn(c.dc.ci).waiter = func(c context.Context) { cancel() <-ctx.Done() } @@ -432,21 +581,21 @@ func testQueryContextWait(t *testing.T) { // TestTxContextWait tests the transaction behavior when the tx context is canceled // during execution of the query. func TestTxContextWait(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - testContextWait(t, false) + testDatabase(t, func(t *testing.T, db *DB) { + testContextWait(t, false, db) }) } // TestTxContextWaitNoDiscard is the same as TestTxContextWait, but should not discard // the final connection. func TestTxContextWaitNoDiscard(t *testing.T) { - synctest.Test(t, func(t *testing.T) { - testContextWait(t, true) + testDatabase(t, func(t *testing.T, db *DB) { + testContextWait(t, true, db) }) } -func testContextWait(t *testing.T, keepConnOnRollback bool) { - db := newTestDB(t, "people") +func testContextWait(t *testing.T, keepConnOnRollback bool, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) @@ -456,7 +605,7 @@ func testContextWait(t *testing.T, keepConnOnRollback bool) { } tx.keepConnOnRollback = keepConnOnRollback - tx.dc.ci.(*fakeConn).waiter = func(c context.Context) { + getFakeConn(tx.dc.ci).waiter = func(c context.Context) { cancel() <-ctx.Done() } @@ -479,10 +628,10 @@ func testContextWait(t *testing.T, keepConnOnRollback bool) { // doesn't implement ConnBeginTx is used with non-default options and an // un-cancellable context. func TestUnsupportedOptions(t *testing.T) { - synctest.Test(t, testUnsupportedOptions) + testDatabase(t, testUnsupportedOptions) } -func testUnsupportedOptions(t *testing.T) { - db := newTestDB(t, "people") +func testUnsupportedOptions(t *testing.T, db *DB) { + populate(t, db, "people") _, err := db.BeginTx(context.Background(), &TxOptions{ Isolation: LevelSerializable, ReadOnly: true, }) @@ -492,10 +641,10 @@ func testUnsupportedOptions(t *testing.T) { } func TestMultiResultSetQuery(t *testing.T) { - synctest.Test(t, testMultiResultSetQuery) + testDatabase(t, testMultiResultSetQuery) } -func testMultiResultSetQuery(t *testing.T) { - db := newTestDB(t, "people") +func testMultiResultSetQuery(t *testing.T, db *DB) { + populate(t, db, "people") prepares0 := numPrepares(t, db) rows, err := db.Query("SELECT|people|age,name|;SELECT|people|name|") if err != nil { @@ -568,10 +717,10 @@ func testMultiResultSetQuery(t *testing.T) { } func TestQueryNamedArg(t *testing.T) { - synctest.Test(t, testQueryNamedArg) + testDatabase(t, testQueryNamedArg, requireFeature("NamedValue")) } -func testQueryNamedArg(t *testing.T) { - db := newTestDB(t, "people") +func testQueryNamedArg(t *testing.T, db *DB) { + populate(t, db, "people") prepares0 := numPrepares(t, db) rows, err := db.Query( // Ensure the name and age parameters only match on placeholder name, not position. @@ -699,10 +848,10 @@ func testPoolExhaustOnCancel(t *testing.T) { } func TestRowsColumns(t *testing.T) { - synctest.Test(t, testRowsColumns) + testDatabase(t, testRowsColumns) } -func testRowsColumns(t *testing.T) { - db := newTestDB(t, "people") +func testRowsColumns(t *testing.T, db *DB) { + populate(t, db, "people") rows, err := db.Query("SELECT|people|age,name|") if err != nil { t.Fatalf("Query: %v", err) @@ -721,11 +870,10 @@ func testRowsColumns(t *testing.T) { } func TestRowsColumnTypes(t *testing.T) { - synctest.Test(t, testRowsColumnTypes) + testDatabase(t, testRowsColumnTypes) } -func testRowsColumnTypes(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testRowsColumnTypes(t *testing.T, db *DB) { + populate(t, db, "people") rows, err := db.Query("SELECT|people|age,name|") if err != nil { t.Fatalf("Query: %v", err) @@ -774,11 +922,10 @@ func testRowsColumnTypes(t *testing.T) { } func TestQueryRow(t *testing.T) { - synctest.Test(t, testQueryRow) + testDatabase(t, testQueryRow) } -func testQueryRow(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testQueryRow(t *testing.T, db *DB) { + populate(t, db, "people") var name string var age int var birthday time.Time @@ -827,10 +974,10 @@ func testQueryRow(t *testing.T) { } func TestRowErr(t *testing.T) { - synctest.Test(t, testRowErr) + testDatabase(t, testRowErr) } -func testRowErr(t *testing.T) { - db := newTestDB(t, "people") +func testRowErr(t *testing.T, db *DB) { + populate(t, db, "people") row := db.QueryRowContext(context.Background(), "SELECT|people|bdate|age=?", 3) if err := row.Err(); err != nil { @@ -849,11 +996,10 @@ func testRowErr(t *testing.T) { } func TestTxRollbackCommitErr(t *testing.T) { - synctest.Test(t, testTxRollbackCommitErr) + testDatabase(t, testTxRollbackCommitErr) } -func testTxRollbackCommitErr(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testTxRollbackCommitErr(t *testing.T, db *DB) { + populate(t, db, "people") tx, err := db.Begin() if err != nil { @@ -883,11 +1029,10 @@ func testTxRollbackCommitErr(t *testing.T) { } func TestStatementErrorAfterClose(t *testing.T) { - synctest.Test(t, testStatementErrorAfterClose) + testDatabase(t, testStatementErrorAfterClose) } -func testStatementErrorAfterClose(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStatementErrorAfterClose(t *testing.T, db *DB) { + populate(t, db, "people") stmt, err := db.Prepare("SELECT|people|age|name=?") if err != nil { t.Fatalf("Prepare: %v", err) @@ -904,11 +1049,10 @@ func testStatementErrorAfterClose(t *testing.T) { } func TestStatementQueryRow(t *testing.T) { - synctest.Test(t, testStatementQueryRow) + testDatabase(t, testStatementQueryRow) } -func testStatementQueryRow(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStatementQueryRow(t *testing.T, db *DB) { + populate(t, db, "people") stmt, err := db.Prepare("SELECT|people|age|name=?") if err != nil { t.Fatalf("Prepare: %v", err) @@ -974,11 +1118,10 @@ func testStatementClose(t *testing.T) { // golang.org/issue/3734 func TestStatementQueryRowConcurrent(t *testing.T) { - synctest.Test(t, testStatementQueryRowConcurrent) + testDatabase(t, testStatementQueryRowConcurrent) } -func testStatementQueryRowConcurrent(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStatementQueryRowConcurrent(t *testing.T, db *DB) { + populate(t, db, "people") stmt, err := db.Prepare("SELECT|people|age|name=?") if err != nil { t.Fatalf("Prepare: %v", err) @@ -1006,11 +1149,9 @@ func testStatementQueryRowConcurrent(t *testing.T) { // just a test of fakedb itself func TestBogusPreboundParameters(t *testing.T) { - synctest.Test(t, testBogusPreboundParameters) + testDatabase(t, testBogusPreboundParameters) } -func testBogusPreboundParameters(t *testing.T) { - db := newTestDB(t, "foo") - defer closeDB(t, db) +func testBogusPreboundParameters(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") _, err := db.Prepare("INSERT|t1|name=?,age=bogusconversion") if err == nil { @@ -1022,11 +1163,9 @@ func testBogusPreboundParameters(t *testing.T) { } func TestExec(t *testing.T) { - synctest.Test(t, testExec) + testDatabase(t, testExec) } -func testExec(t *testing.T) { - db := newTestDB(t, "foo") - defer closeDB(t, db) +func testExec(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") stmt, err := db.Prepare("INSERT|t1|name=?,age=?") if err != nil { @@ -1067,11 +1206,9 @@ func testExec(t *testing.T) { } func TestTxPrepare(t *testing.T) { - synctest.Test(t, testTxPrepare) + testDatabase(t, testTxPrepare) } -func testTxPrepare(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxPrepare(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") tx, err := db.Begin() if err != nil { @@ -1097,11 +1234,9 @@ func testTxPrepare(t *testing.T) { } func TestTxStmt(t *testing.T) { - synctest.Test(t, testTxStmt) + testDatabase(t, testTxStmt) } -func testTxStmt(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxStmt(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") stmt, err := db.Prepare("INSERT|t1|name=?,age=?") if err != nil { @@ -1129,11 +1264,9 @@ func testTxStmt(t *testing.T) { } func TestTxStmtPreparedOnce(t *testing.T) { - synctest.Test(t, testTxStmtPreparedOnce) + testDatabase(t, testTxStmtPreparedOnce) } -func testTxStmtPreparedOnce(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxStmtPreparedOnce(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32") prepares0 := numPrepares(t, db) @@ -1176,11 +1309,9 @@ func testTxStmtPreparedOnce(t *testing.T) { } func TestTxStmtClosedRePrepares(t *testing.T) { - synctest.Test(t, testTxStmtClosedRePrepares) + testDatabase(t, testTxStmtClosedRePrepares) } -func testTxStmtClosedRePrepares(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxStmtClosedRePrepares(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32") prepares0 := numPrepares(t, db) @@ -1224,11 +1355,9 @@ func testTxStmtClosedRePrepares(t *testing.T) { } func TestParentStmtOutlivesTxStmt(t *testing.T) { - synctest.Test(t, testParentStmtOutlivesTxStmt) + testDatabase(t, testParentStmtOutlivesTxStmt) } -func testParentStmtOutlivesTxStmt(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testParentStmtOutlivesTxStmt(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32") // Make sure everything happens on the same connection. @@ -1278,11 +1407,9 @@ func testParentStmtOutlivesTxStmt(t *testing.T) { // associated with tx as argument re-prepares the same // statement again. func TestTxStmtFromTxStmtRePrepares(t *testing.T) { - synctest.Test(t, testTxStmtFromTxStmtRePrepares) + testDatabase(t, testTxStmtFromTxStmtRePrepares) } -func testTxStmtFromTxStmtRePrepares(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxStmtFromTxStmtRePrepares(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32") prepares0 := numPrepares(t, db) // db.Prepare increments numPrepares. @@ -1334,11 +1461,9 @@ func testTxStmtFromTxStmtRePrepares(t *testing.T) { // This test didn't fail before because we got lucky with the fakedb driver. // It was failing, and now not, in github.com/bradfitz/go-sql-test func TestTxQuery(t *testing.T) { - synctest.Test(t, testTxQuery) + testDatabase(t, testTxQuery) } -func testTxQuery(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testTxQuery(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") exec(t, db, "INSERT|t1|name=Alice") @@ -1369,10 +1494,9 @@ func testTxQuery(t *testing.T) { } func TestTxQueryInvalid(t *testing.T) { - synctest.Test(t, testTxQueryInvalid) + testDatabase(t, testTxQueryInvalid) } -func testTxQueryInvalid(t *testing.T) { - db := newTestDB(t, "") +func testTxQueryInvalid(t *testing.T, db *DB) { defer closeDB(t, db) tx, err := db.Begin() @@ -1424,18 +1548,17 @@ func testTxErrBadConn(t *testing.T) { } func TestConnQuery(t *testing.T) { - synctest.Test(t, testConnQuery) + testDatabase(t, testConnQuery) } -func testConnQuery(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testConnQuery(t *testing.T, db *DB) { + populate(t, db, "people") ctx := t.Context() conn, err := db.Conn(ctx) if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true defer conn.Close() var name string @@ -1454,26 +1577,23 @@ func testConnQuery(t *testing.T) { } func TestConnRaw(t *testing.T) { - synctest.Test(t, testConnRaw) + testDatabase(t, testConnRaw) } -func testConnRaw(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testConnRaw(t *testing.T, db *DB) { + populate(t, db, "people") ctx := t.Context() conn, err := db.Conn(ctx) if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true defer conn.Close() sawFunc := false err = conn.Raw(func(dc any) error { sawFunc = true - if _, ok := dc.(*fakeConn); !ok { - return fmt.Errorf("got %T want *fakeConn", dc) - } + _ = getFakeConn(dc.(driver.Conn)) return nil }) if err != nil { @@ -1504,11 +1624,10 @@ func testConnRaw(t *testing.T) { } func TestCursorFake(t *testing.T) { - synctest.Test(t, testCursorFake) + testDatabase(t, testCursorFake) } -func testCursorFake(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testCursorFake(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) defer cancel() @@ -1583,7 +1702,7 @@ func TestInvalidNilValues(t *testing.T) { if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true defer conn.Close() err = conn.QueryRowContext(ctx, "SELECT|people|bdate|age=?", 1).Scan(tt.input) @@ -1603,18 +1722,17 @@ func TestInvalidNilValues(t *testing.T) { } func TestConnTx(t *testing.T) { - synctest.Test(t, testConnTx) + testDatabase(t, testConnTx) } -func testConnTx(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testConnTx(t *testing.T, db *DB) { + populate(t, db, "people") ctx := t.Context() conn, err := db.Conn(ctx) if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true defer conn.Close() tx, err := conn.BeginTx(ctx, nil) @@ -1645,11 +1763,10 @@ func testConnTx(t *testing.T) { // is actually discarded and does not re-enter the connection pool. // If the IsValid method from *fakeConn is removed, this test will fail. func TestConnIsValid(t *testing.T) { - synctest.Test(t, testConnIsValid) + testDatabase(t, testConnIsValid, requireFeature("Validator")) } -func testConnIsValid(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testConnIsValid(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxOpenConns(1) @@ -1661,7 +1778,7 @@ func testConnIsValid(t *testing.T) { } err = c.Raw(func(raw any) error { - dc := raw.(*fakeConn) + dc := getFakeConn(raw.(driver.Conn)) dc.stickyBad = true return nil }) @@ -1670,7 +1787,7 @@ func testConnIsValid(t *testing.T) { } c.Close() - if len(db.freeConn) > 0 && db.freeConn[0].ci.(*fakeConn).stickyBad { + if len(db.freeConn) > 0 && getFakeConn(db.freeConn[0].ci).stickyBad { t.Fatal("bad connection returned to pool; expected bad connection to be discarded") } } @@ -1678,10 +1795,10 @@ func testConnIsValid(t *testing.T) { // Tests fix for issue 2542, that we release a lock when querying on // a closed connection. func TestIssue2542Deadlock(t *testing.T) { - synctest.Test(t, testIssue2542Deadlock) + testDatabase(t, testIssue2542Deadlock) } -func testIssue2542Deadlock(t *testing.T) { - db := newTestDB(t, "people") +func testIssue2542Deadlock(t *testing.T, db *DB) { + populate(t, db, "people") closeDB(t, db) for i := 0; i < 2; i++ { _, err := db.Query("SELECT|people|age,name|") @@ -1693,11 +1810,10 @@ func testIssue2542Deadlock(t *testing.T) { // From golang.org/issue/3865 func TestCloseStmtBeforeRows(t *testing.T) { - synctest.Test(t, testCloseStmtBeforeRows) + testDatabase(t, testCloseStmtBeforeRows) } -func testCloseStmtBeforeRows(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testCloseStmtBeforeRows(t *testing.T, db *DB) { + populate(t, db, "people") s, err := db.Prepare("SELECT|people|name|") if err != nil { @@ -1721,11 +1837,9 @@ func testCloseStmtBeforeRows(t *testing.T) { // Tests fix for issue 2788, that we bind nil to a []byte if the // value in the column is sql null func TestNullByteSlice(t *testing.T) { - synctest.Test(t, testNullByteSlice) + testDatabase(t, testNullByteSlice) } -func testNullByteSlice(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testNullByteSlice(t *testing.T, db *DB) { exec(t, db, "CREATE|t|id=int32,name=nullstring") exec(t, db, "INSERT|t|id=10,name=?", nil) @@ -1750,11 +1864,9 @@ func testNullByteSlice(t *testing.T) { } func TestPointerParamsAndScans(t *testing.T) { - synctest.Test(t, testPointerParamsAndScans) + testDatabase(t, testPointerParamsAndScans) } -func testPointerParamsAndScans(t *testing.T) { - db := newTestDB(t, "") - defer closeDB(t, db) +func testPointerParamsAndScans(t *testing.T, db *DB) { exec(t, db, "CREATE|t|id=int32,name=nullstring") bob := "bob" @@ -1785,11 +1897,10 @@ func testPointerParamsAndScans(t *testing.T) { } func TestQueryRowClosingStmt(t *testing.T) { - synctest.Test(t, testQueryRowClosingStmt) + testDatabase(t, testQueryRowClosingStmt) } -func testQueryRowClosingStmt(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testQueryRowClosingStmt(t *testing.T, db *DB) { + populate(t, db, "people") var name string var age int err := db.QueryRow("SELECT|people|age,name|age=?", 3).Scan(&age, &name) @@ -1799,7 +1910,7 @@ func testQueryRowClosingStmt(t *testing.T) { if len(db.freeConn) != 1 { t.Fatalf("expected 1 free conn") } - fakeConn := db.freeConn[0].ci.(*fakeConn) + fakeConn := getFakeConn(db.freeConn[0].ci) if made, closed := fakeConn.stmtsMade, fakeConn.stmtsClosed; made != closed { t.Errorf("statement close mismatch: made %d, closed %d", made, closed) } @@ -1825,11 +1936,10 @@ func setRowsCloseHook(fn func(*Rows, *error)) { // Test issue 6651 func TestIssue6651(t *testing.T) { - synctest.Test(t, testIssue6651) + testDatabase(t, testIssue6651) } -func testIssue6651(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue6651(t *testing.T, db *DB) { + populate(t, db, "people") var v string @@ -1877,8 +1987,8 @@ func TestNullStringParam(t *testing.T) { {NullString{"eel", false}, "", NullString{"", false}}, {"foo", NullString{"black", false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1891,8 +2001,8 @@ func TestGenericNullStringParam(t *testing.T) { {Null[string]{"eel", false}, "", Null[string]{"", false}}, {"foo", Null[string]{"black", false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1905,8 +2015,8 @@ func TestNullInt64Param(t *testing.T) { {NullInt64{222, false}, 1, NullInt64{0, false}}, {0, NullInt64{31, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1919,8 +2029,8 @@ func TestNullInt32Param(t *testing.T) { {NullInt32{222, false}, 1, NullInt32{0, false}}, {0, NullInt32{31, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1933,8 +2043,8 @@ func TestNullInt16Param(t *testing.T) { {NullInt16{222, false}, 1, NullInt16{0, false}}, {0, NullInt16{31, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1947,8 +2057,8 @@ func TestNullByteParam(t *testing.T) { {NullByte{222, false}, 1, NullByte{0, false}}, {0, NullByte{31, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1961,8 +2071,8 @@ func TestNullFloat64Param(t *testing.T) { {NullFloat64{222, false}, 1, NullFloat64{0, false}}, {10, NullFloat64{31.2, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1975,8 +2085,8 @@ func TestNullBoolParam(t *testing.T) { {NullBool{true, false}, true, NullBool{false, false}}, {true, NullBool{true, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -1992,8 +2102,8 @@ func TestNullTimeParam(t *testing.T) { {NullTime{t1, false}, t2, NullTime{t0, false}}, {t2, NullTime{t1, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } @@ -2009,14 +2119,12 @@ func TestNullUUIDParam(t *testing.T) { {Null[uuid.UUID]{u1, false}, u2, Null[uuid.UUID]{u0, false}}, {u2, Null[uuid.UUID]{u1, false}, nil}, }} - synctest.Test(t, func(t *testing.T) { - nullTestRun(t, spec) + testDatabase(t, func(t *testing.T, db *DB) { + nullTestRun(t, spec, db) }) } -func nullTestRun(t *testing.T, spec nullTestSpec) { - db := newTestDB(t, "") - defer closeDB(t, db) +func nullTestRun(t *testing.T, spec nullTestSpec, db *DB) { exec(t, db, fmt.Sprintf("CREATE|t|id=int32,name=string,nullf=%s,notnullf=%s", spec.nullType, spec.notNullType)) // Inserts with db.Exec: @@ -2072,11 +2180,10 @@ func nullTestRun(t *testing.T, spec nullTestSpec) { // golang.org/issue/4859 func TestQueryRowNilScanDest(t *testing.T) { - synctest.Test(t, testQueryRowNilScanDest) + testDatabase(t, testQueryRowNilScanDest) } -func testQueryRowNilScanDest(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testQueryRowNilScanDest(t *testing.T, db *DB) { + populate(t, db, "people") var name *string // nil pointer err := db.QueryRow("SELECT|people|name|").Scan(name) want := `sql: Scan error on column index 0, name "name": destination pointer is nil` @@ -2086,11 +2193,10 @@ func testQueryRowNilScanDest(t *testing.T) { } func TestIssue4902(t *testing.T) { - synctest.Test(t, testIssue4902) + testDatabase(t, testIssue4902) } -func testIssue4902(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue4902(t *testing.T, db *DB) { + populate(t, db, "people") driver := db.Driver().(*fakeDriver) opens0 := driver.openCount @@ -2120,11 +2226,10 @@ func testIssue4902(t *testing.T) { // Issue 3857 // This used to deadlock. func TestSimultaneousQueries(t *testing.T) { - synctest.Test(t, testSimultaneousQueries) + testDatabase(t, testSimultaneousQueries) } -func testSimultaneousQueries(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testSimultaneousQueries(t *testing.T, db *DB) { + populate(t, db, "people") tx, err := db.Begin() if err != nil { @@ -2146,11 +2251,10 @@ func testSimultaneousQueries(t *testing.T) { } func TestMaxIdleConns(t *testing.T) { - synctest.Test(t, testMaxIdleConns) + testDatabase(t, testMaxIdleConns) } -func testMaxIdleConns(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testMaxIdleConns(t *testing.T, db *DB) { + populate(t, db, "people") tx, err := db.Begin() if err != nil { @@ -2178,9 +2282,9 @@ func testMaxIdleConns(t *testing.T) { } func TestMaxOpenConns(t *testing.T) { - synctest.Test(t, testMaxOpenConns) + testDatabase(t, testMaxOpenConns) } -func testMaxOpenConns(t *testing.T) { +func testMaxOpenConns(t *testing.T, db *DB) { if testing.Short() { t.Skip("skipping in short mode") } @@ -2191,8 +2295,7 @@ func testMaxOpenConns(t *testing.T) { } }) - db := newTestDB(t, "magicquery") - defer closeDB(t, db) + populate(t, db, "magicquery") driver := db.Driver().(*fakeDriver) @@ -2297,9 +2400,9 @@ func testMaxOpenConns(t *testing.T) { // Issue 9453: tests that SetMaxOpenConns can be lowered at runtime // and affects the subsequent release of connections. func TestMaxOpenConnsOnBusy(t *testing.T) { - synctest.Test(t, testMaxOpenConnsOnBusy) + testDatabase(t, testMaxOpenConnsOnBusy) } -func testMaxOpenConnsOnBusy(t *testing.T) { +func testMaxOpenConnsOnBusy(t *testing.T, db *DB) { defer setHookpostCloseConn(nil) setHookpostCloseConn(func(_ *fakeConn, err error) { if err != nil { @@ -2307,8 +2410,7 @@ func testMaxOpenConnsOnBusy(t *testing.T) { } }) - db := newTestDB(t, "magicquery") - defer closeDB(t, db) + populate(t, db, "magicquery") db.SetMaxOpenConns(3) @@ -2437,11 +2539,10 @@ func testPendingConnsAfterErr(t *testing.T) { } func TestSingleOpenConn(t *testing.T) { - synctest.Test(t, testSingleOpenConn) + testDatabase(t, testSingleOpenConn) } -func testSingleOpenConn(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testSingleOpenConn(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxOpenConns(1) @@ -2463,10 +2564,10 @@ func testSingleOpenConn(t *testing.T) { } func TestStats(t *testing.T) { - synctest.Test(t, testStats) + testDatabase(t, testStats) } -func testStats(t *testing.T) { - db := newTestDB(t, "people") +func testStats(t *testing.T, db *DB) { + populate(t, db, "people") stats := db.Stats() if got := stats.OpenConnections; got != 1 { t.Errorf("stats.OpenConnections = %d; want 1", got) @@ -2486,11 +2587,10 @@ func testStats(t *testing.T) { } func TestConnMaxLifetime(t *testing.T) { - synctest.Test(t, testConnMaxLifetime) + testDatabase(t, testConnMaxLifetime) } -func testConnMaxLifetime(t *testing.T) { - db := newTestDB(t, "magicquery") - defer closeDB(t, db) +func testConnMaxLifetime(t *testing.T, db *DB) { + populate(t, db, "magicquery") driver := db.Driver().(*fakeDriver) @@ -2571,9 +2671,9 @@ func testConnMaxLifetime(t *testing.T) { // golang.org/issue/5323 func TestStmtCloseDeps(t *testing.T) { - synctest.Test(t, testStmtCloseDeps) + testDatabase(t, testStmtCloseDeps) } -func testStmtCloseDeps(t *testing.T) { +func testStmtCloseDeps(t *testing.T, db *DB) { if testing.Short() { t.Skip("skipping in short mode") } @@ -2584,8 +2684,7 @@ func testStmtCloseDeps(t *testing.T) { } }) - db := newTestDB(t, "magicquery") - defer closeDB(t, db) + populate(t, db, "magicquery") driver := db.Driver().(*fakeDriver) @@ -2668,11 +2767,10 @@ func testStmtCloseDeps(t *testing.T) { // golang.org/issue/5046 func TestCloseConnBeforeStmts(t *testing.T) { - synctest.Test(t, testCloseConnBeforeStmts) + testDatabase(t, testCloseConnBeforeStmts) } -func testCloseConnBeforeStmts(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testCloseConnBeforeStmts(t *testing.T, db *DB) { + populate(t, db, "people") defer setHookpostCloseConn(nil) setHookpostCloseConn(func(_ *fakeConn, err error) { @@ -2726,11 +2824,10 @@ func testCloseConnBeforeStmts(t *testing.T) { // golang.org/issue/5283: don't release the Rows' connection in Close // before calling Stmt.Close. func TestRowsCloseOrder(t *testing.T) { - synctest.Test(t, testRowsCloseOrder) + testDatabase(t, testRowsCloseOrder) } -func testRowsCloseOrder(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testRowsCloseOrder(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxIdleConns(0) setStrictFakeConnClose(t) @@ -2747,11 +2844,10 @@ func testRowsCloseOrder(t *testing.T) { } func TestRowsImplicitClose(t *testing.T) { - synctest.Test(t, testRowsImplicitClose) + testDatabase(t, testRowsImplicitClose) } -func testRowsImplicitClose(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testRowsImplicitClose(t *testing.T, db *DB) { + populate(t, db, "people") rows, err := db.Query("SELECT|people|age,name|") if err != nil { @@ -2778,11 +2874,10 @@ func testRowsImplicitClose(t *testing.T) { } func TestRowsCloseError(t *testing.T) { - synctest.Test(t, testRowsCloseError) + testDatabase(t, testRowsCloseError) } -func testRowsCloseError(t *testing.T) { - db := newTestDB(t, "people") - defer db.Close() +func testRowsCloseError(t *testing.T, db *DB) { + populate(t, db, "people") rows, err := db.Query("SELECT|people|age,name|") if err != nil { t.Fatalf("Query: %v", err) @@ -2814,11 +2909,10 @@ func testRowsCloseError(t *testing.T) { } func TestStmtCloseOrder(t *testing.T) { - synctest.Test(t, testStmtCloseOrder) + testDatabase(t, testStmtCloseOrder) } -func testStmtCloseOrder(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStmtCloseOrder(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxIdleConns(0) setStrictFakeConnClose(t) @@ -2866,7 +2960,7 @@ func testManyErrBadConn(t *testing.T) { } for _, conn := range db.freeConn { conn.Lock() - conn.ci.(*fakeConn).stickyBad = true + getFakeConn(conn.ci).stickyBad = true conn.Unlock() } return db @@ -2956,7 +3050,7 @@ func testManyErrBadConn(t *testing.T) { if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true err = conn.Close() if err != nil { t.Fatal(err) @@ -2973,11 +3067,10 @@ func testManyErrBadConn(t *testing.T) { // Issue 34775: Ensure that a Tx cannot commit after a rollback. func TestTxCannotCommitAfterRollback(t *testing.T) { - synctest.Test(t, testTxCannotCommitAfterRollback) + testDatabase(t, testTxCannotCommitAfterRollback) } -func testTxCannotCommitAfterRollback(t *testing.T) { - db := newTestDB(t, "tx_status") - defer closeDB(t, db) +func testTxCannotCommitAfterRollback(t *testing.T, db *DB) { + populate(t, db, "tx_status") // First check query reporting is correct. var txStatus string @@ -3044,11 +3137,10 @@ func testTxCannotCommitAfterRollback(t *testing.T) { // Issue 40985 transaction statement deadlock while context cancel. func TestTxStmtDeadlock(t *testing.T) { - synctest.Test(t, testTxStmtDeadlock) + testDatabase(t, testTxStmtDeadlock) } -func testTxStmtDeadlock(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testTxStmtDeadlock(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -3132,7 +3224,7 @@ func TestConnExpiresFreshOutOfPool(t *testing.T) { synctest.Sleep(11 * time.Second) - conn.ci.(*fakeConn).stickyBad = ec.badReset + getFakeConn(conn.ci).stickyBad = ec.badReset db.putConn(conn, err, true) @@ -3144,11 +3236,10 @@ func TestConnExpiresFreshOutOfPool(t *testing.T) { // TestIssue20575 ensures the Rows from query does not block // closing a transaction. Ensure Rows is closed while closing a transaction. func TestIssue20575(t *testing.T) { - synctest.Test(t, testIssue20575) + testDatabase(t, testIssue20575) } -func testIssue20575(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue20575(t *testing.T, db *DB) { + populate(t, db, "people") tx, err := db.Begin() if err != nil { @@ -3175,11 +3266,10 @@ func testIssue20575(t *testing.T) { // TestIssue20622 tests closing the transaction before rows is closed, requires // the race detector to fail. func TestIssue20622(t *testing.T) { - synctest.Test(t, testIssue20622) + testDatabase(t, testIssue20622) } -func testIssue20622(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue20622(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -3214,11 +3304,9 @@ func testIssue20622(t *testing.T) { // golang.org/issue/5718 func TestErrBadConnReconnect(t *testing.T) { - synctest.Test(t, testErrBadConnReconnect) + testDatabase(t, testErrBadConnReconnect) } -func testErrBadConnReconnect(t *testing.T) { - db := newTestDB(t, "foo") - defer closeDB(t, db) +func testErrBadConnReconnect(t *testing.T, db *DB) { exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") simulateBadConn := func(name string, hook *func() bool, op func() error) { @@ -3323,11 +3411,9 @@ func testErrBadConnReconnect(t *testing.T) { // golang.org/issue/11264 func TestTxEndBadConn(t *testing.T) { - synctest.Test(t, testTxEndBadConn) + testDatabase(t, testTxEndBadConn) } -func testTxEndBadConn(t *testing.T) { - db := newTestDB(t, "foo") - defer closeDB(t, db) +func testTxEndBadConn(t *testing.T, db *DB) { db.SetMaxIdleConns(0) exec(t, db, "CREATE|t1|name=string,age=int32,dead=bool") db.SetMaxIdleConns(1) @@ -3740,11 +3826,10 @@ func doConcurrentTest(t testing.TB, ct concurrentTest) { } func TestIssue6081(t *testing.T) { - synctest.Test(t, testIssue6081) + testDatabase(t, testIssue6081) } -func testIssue6081(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue6081(t *testing.T, db *DB) { + populate(t, db, "people") drv := db.Driver().(*fakeDriver) drv.mu.Lock() @@ -3799,11 +3884,10 @@ func testIssue6081(t *testing.T) { // The addition of calling rows.Next also tests // Issue 21117. func TestIssue18429(t *testing.T) { - synctest.Test(t, testIssue18429) + testDatabase(t, testIssue18429) } -func testIssue18429(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue18429(t *testing.T, db *DB) { + populate(t, db, "people") ctx := context.Background() sem := make(chan bool, 20) @@ -3851,11 +3935,10 @@ func testIssue18429(t *testing.T) { // TestIssue20160 attempts to test a short context life on a stmt Query. func TestIssue20160(t *testing.T) { - synctest.Test(t, testIssue20160) + testDatabase(t, testIssue20160) } -func testIssue20160(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue20160(t *testing.T, db *DB) { + populate(t, db, "people") ctx := context.Background() sem := make(chan bool, 20) @@ -3898,11 +3981,10 @@ func testIssue20160(t *testing.T) { // // See https://golang.org/cl/35550 . func TestIssue18719(t *testing.T) { - synctest.Test(t, testIssue18719) + testDatabase(t, testIssue18719, requireFeature("BeginTx")) } -func testIssue18719(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue18719(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -3937,11 +4019,10 @@ func testIssue18719(t *testing.T) { } func TestIssue20647(t *testing.T) { - synctest.Test(t, testIssue20647) + testDatabase(t, testIssue20647) } -func testIssue20647(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testIssue20647(t *testing.T, db *DB) { + populate(t, db, "people") ctx := t.Context() @@ -3949,7 +4030,7 @@ func testIssue20647(t *testing.T) { if err != nil { t.Fatal(err) } - conn.dc.ci.(*fakeConn).skipDirtySession = true + getFakeConn(conn.dc.ci).skipDirtySession = true defer conn.Close() stmt, err := conn.PrepareContext(ctx, "SELECT|people|name|") @@ -3998,11 +4079,10 @@ func TestConcurrency(t *testing.T) { } func TestConnectionLeak(t *testing.T) { - synctest.Test(t, testConnectionLeak) + testDatabase(t, testConnectionLeak) } -func testConnectionLeak(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testConnectionLeak(t *testing.T, db *DB) { + populate(t, db, "people") // Start by opening defaultMaxIdleConns rows := make([]*Rows, defaultMaxIdleConns) // We need to SetMaxOpenConns > MaxIdleConns, so the DB can open @@ -4053,11 +4133,10 @@ func testConnectionLeak(t *testing.T) { } func TestStatsMaxIdleClosedZero(t *testing.T) { - synctest.Test(t, testStatsMaxIdleClosedZero) + testDatabase(t, testStatsMaxIdleClosedZero) } -func testStatsMaxIdleClosedZero(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStatsMaxIdleClosedZero(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxOpenConns(1) db.SetMaxIdleConns(1) @@ -4082,11 +4161,10 @@ func testStatsMaxIdleClosedZero(t *testing.T) { } func TestStatsMaxIdleClosedTen(t *testing.T) { - synctest.Test(t, testStatsMaxIdleClosedTen) + testDatabase(t, testStatsMaxIdleClosedTen) } -func testStatsMaxIdleClosedTen(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testStatsMaxIdleClosedTen(t *testing.T, db *DB) { + populate(t, db, "people") db.SetMaxOpenConns(1) db.SetMaxIdleConns(0) @@ -4240,7 +4318,7 @@ type nvcDriver struct { func (d *nvcDriver) Open(dsn string) (driver.Conn, error) { c, err := d.fakeDriver.Open(dsn) - fc := c.(*fakeConn) + fc := getFakeConn(c) fc.db.allowAny = true return &nvcConn{fc, d.skipNamedValueCheck}, err } @@ -4394,7 +4472,7 @@ func (d *ctxOnlyDriver) Open(dsn string) (driver.Conn, error) { if err != nil { return nil, err } - return &ctxOnlyConn{fc: conn.(*fakeConn)}, nil + return &ctxOnlyConn{fc: getFakeConn(conn)}, nil } var ( @@ -4519,11 +4597,10 @@ func (alwaysErrScanner) Scan(any) error { // Issue 38099: Ensure that Rows.Scan properly wraps underlying errors. func TestRowsScanProperlyWrapsErrors(t *testing.T) { - synctest.Test(t, testRowsScanProperlyWrapsErrors) + testDatabase(t, testRowsScanProperlyWrapsErrors) } -func testRowsScanProperlyWrapsErrors(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testRowsScanProperlyWrapsErrors(t *testing.T, db *DB) { + populate(t, db, "people") rows, err := db.Query("SELECT|people|age|") if err != nil { @@ -4676,11 +4753,10 @@ func testContextCancelDuringRawBytesScan(t *testing.T, mode string) { } func TestContextCancelBetweenNextAndErr(t *testing.T) { - synctest.Test(t, testContextCancelBetweenNextAndErr) + testDatabase(t, testContextCancelBetweenNextAndErr) } -func testContextCancelBetweenNextAndErr(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testContextCancelBetweenNextAndErr(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -4704,11 +4780,10 @@ type testScanner struct { func (ts testScanner) Scan(src any) error { return ts.scanf(src) } func TestContextCancelDuringScan(t *testing.T) { - synctest.Test(t, testContextCancelDuringScan) + testDatabase(t, testContextCancelDuringScan) } -func testContextCancelDuringScan(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testContextCancelDuringScan(t *testing.T, db *DB) { + populate(t, db, "people") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -4755,11 +4830,10 @@ func testContextCancelDuringScan(t *testing.T) { } func TestNilErrorAfterClose(t *testing.T) { - synctest.Test(t, testNilErrorAfterClose) + testDatabase(t, testNilErrorAfterClose) } -func testNilErrorAfterClose(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testNilErrorAfterClose(t *testing.T, db *DB) { + populate(t, db, "people") // This WithCancel is important; Rows contains an optimization to avoid // spawning a goroutine when the query/transaction context cannot be @@ -4787,11 +4861,10 @@ func testNilErrorAfterClose(t *testing.T) { // If a RawBytes is reused across multiple queries, // subsequent queries shouldn't overwrite driver-owned memory from previous queries. func TestRawBytesReuse(t *testing.T) { - synctest.Test(t, testRawBytesReuse) + testDatabase(t, testRawBytesReuse) } -func testRawBytesReuse(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testRawBytesReuse(t *testing.T, db *DB) { + populate(t, db, "people") var raw RawBytes @@ -4928,11 +5001,10 @@ func testPing(t *testing.T) { // Issue 18101. func TestTypedString(t *testing.T) { - synctest.Test(t, testTypedString) + testDatabase(t, testTypedString) } -func testTypedString(t *testing.T) { - db := newTestDB(t, "people") - defer closeDB(t, db) +func testTypedString(t *testing.T, db *DB) { + populate(t, db, "people") type Str string var scanned Str diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go index 8527ff28967cc6..93f0e06eb9fc86 100644 --- a/test/codegen/condmove.go +++ b/test/codegen/condmove.go @@ -6,7 +6,10 @@ package codegen -import "crypto/subtle" +import ( + "crypto/subtle" + "math/bits" +) func cmovint(c int) int { x := c + 4 @@ -808,3 +811,33 @@ func constantTimeSelect(v, x, y int) int { // riscv64/rva23u64:"CZERONEZ" "CZEROEQZ" "OR" -"SNEZ" -"NEG" -"AND" return subtle.ConstantTimeSelect(v, x, y) } + +func issue76056fieldReduceOnceSub32(a uint32) uint32 { + const q = 8380417 // 2²³ - 2¹³ + 1 + // FIXME: the compiler struggles with Sub32 since it's not intriscified. + x, b := bits.Sub32(a, q, 0) + // FIXME: prove doesn't rewrite this multiply to a condselect because it doesn't know that b is always 0 or 1. + return x + b*q +} + +func issue76056fieldReduceOnce2Sub32(a uint32) uint32 { + const q = 8380417 // 2²³ - 2¹³ + 1 + // FIXME: the compiler struggles with Sub32 since it's not intriscified. + x, b := bits.Sub32(a, q, 0) + return uint32(subtle.ConstantTimeSelect(int(b), int(a), int(x))) +} + +func issue76056fieldReduceOnceSub64(a uint32) uint32 { + const q = 8380417 // 2²³ - 2¹³ + 1 + x, b := bits.Sub64(uint64(a), q, 0) + // FIXME: prove doesn't rewrite this multiply to a condselect because it doesn't know that b is always 0 or 1. + return uint32(x) + uint32(b)*q +} + +func issue76056fieldReduceOnce2Sub64(a uint32) uint32 { + const q = 8380417 // 2²³ - 2¹³ + 1 + // amd64:"SUB" -"TEST" -"SBB" + x, b := bits.Sub64(uint64(a), q, 0) + // amd64:"CMOV" -"TEST" -"SBB" + return uint32(subtle.ConstantTimeSelect(int(b), int(a), int(x))) +}