[llvm] f8ed709 - [MachineCombiner] Extend reassociation logic to handle inverse instructions
Anton Sidorenko via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 7 02:51:26 PST 2022
Author: Anton Sidorenko
Date: 2022-12-07T13:50:28+03:00
New Revision: f8ed7093452aabd71650a3bb5dbca942815f9563
URL: https://github.com/llvm/llvm-project/commit/f8ed7093452aabd71650a3bb5dbca942815f9563
DIFF: https://github.com/llvm/llvm-project/commit/f8ed7093452aabd71650a3bb5dbca942815f9563.diff
LOG: [MachineCombiner] Extend reassociation logic to handle inverse instructions
Machine combiner supports generic reassociation only of associative and
commutative instructions, for example (A + X) + Y => (X + Y) + A. However, we
can extend this generic support to handle patterns like
(X + A) - Y => (X - Y) + A), where `-` is the inverse of `+`.
This patch adds interface functions to process reassociation patterns of
associative/commutative instructions and their inverse variants with minimal
changes in backends.
Differential Revision: https://reviews.llvm.org/D136754
Added:
Modified:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/TargetInstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.h
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
llvm/test/CodeGen/AArch64/arm64-rev.ll
llvm/test/CodeGen/AArch64/machine-combiner.ll
llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
llvm/test/CodeGen/PowerPC/machine-combiner.ll
llvm/test/CodeGen/RISCV/machine-combiner.ll
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/test/CodeGen/X86/horizontal-sum.ll
llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
llvm/test/CodeGen/X86/machine-combiner.ll
llvm/test/CodeGen/X86/madd.ll
llvm/test/CodeGen/X86/masked_gather_scatter.ll
llvm/test/CodeGen/X86/mul-constant-i64.ll
llvm/test/CodeGen/X86/pr46877.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll
llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 3aa8c576f01d6..ed6371b654114 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1170,11 +1170,22 @@ class TargetInstrInfo : public MCInstrInfo {
/// will be set to true.
bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const;
- /// Return true when \P Inst is both associative and commutative.
- virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ /// Return true when \P Inst is both associative and commutative. If \P Invert
+ /// is true, then the inverse of \P Inst operation must be tested.
+ virtual bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert = false) const {
return false;
}
+ /// Return the inverse operation opcode if it exists for \P Opcode (e.g. add
+ /// for sub and vice versa).
+ virtual std::optional<unsigned> getInverseOpcode(unsigned Opcode) const {
+ return std::nullopt;
+ }
+
+ /// Return true when \P Opcode1 or its inversion is equal to \P Opcode2.
+ bool areOpcodesEqualOrInverse(unsigned Opcode1, unsigned Opcode2) const;
+
/// Return true when \P Inst has reassociable operands in the same \P MBB.
virtual bool hasReassociableOperands(const MachineInstr &Inst,
const MachineBasicBlock *MBB) const;
@@ -1207,6 +1218,15 @@ class TargetInstrInfo : public MCInstrInfo {
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+ /// Reassociation of some instructions requires inverse operations (e.g.
+ /// (X + A) - Y => (X - Y) + A). This method returns a pair of new opcodes
+ /// (new root opcode, new prev opcode) that must be used to reassociate \P
+ /// Root and \P Prev accoring to \P Pattern.
+ std::pair<unsigned, unsigned>
+ getReassociationOpcodes(MachineCombinerPattern Pattern,
+ const MachineInstr &Root,
+ const MachineInstr &Prev) const;
+
/// The limit on resource length extension we accept in MachineCombiner Pass.
virtual int getExtendResourceLenLimit() const { return 0; }
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index d15369f09e480..66b35c6a01ee9 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -715,39 +715,50 @@ bool TargetInstrInfo::hasReassociableOperands(
return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
}
+bool TargetInstrInfo::areOpcodesEqualOrInverse(unsigned Opcode1,
+ unsigned Opcode2) const {
+ return Opcode1 == Opcode2 || getInverseOpcode(Opcode1) == Opcode2;
+}
+
bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
bool &Commuted) const {
const MachineBasicBlock *MBB = Inst.getParent();
const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
- unsigned AssocOpcode = Inst.getOpcode();
+ unsigned Opcode = Inst.getOpcode();
- // If only one operand has the same opcode and it's the second source operand,
- // the operands must be commuted.
- Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+ // If only one operand has the same or inverse opcode and it's the second
+ // source operand, the operands must be commuted.
+ Commuted = !areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ areOpcodesEqualOrInverse(Opcode, MI2->getOpcode());
if (Commuted)
std::swap(MI1, MI2);
// 1. The previous instruction must be the same type as Inst.
- // 2. The previous instruction must also be associative/commutative (this can
- // be
diff erent even for instructions with the same opcode if traits like
- // fast-math-flags are included).
+ // 2. The previous instruction must also be associative/commutative or be the
+ // inverse of such an operation (this can be
diff erent even for
+ // instructions with the same opcode if traits like fast-math-flags are
+ // included).
// 3. The previous instruction must have virtual register definitions for its
// operands in the same basic block as Inst.
// 4. The previous instruction's result must only be used by Inst.
- return MI1->getOpcode() == AssocOpcode && isAssociativeAndCommutative(*MI1) &&
+ return areOpcodesEqualOrInverse(Opcode, MI1->getOpcode()) &&
+ (isAssociativeAndCommutative(*MI1) ||
+ isAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
hasReassociableOperands(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
}
-// 1. The operation must be associative and commutative.
+// 1. The operation must be associative and commutative or be the inverse of
+// such an operation.
// 2. The instruction must have virtual register definitions for its
// operands in the same basic block.
// 3. The instruction must have a reassociable sibling.
bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
bool &Commuted) const {
- return isAssociativeAndCommutative(Inst) &&
+ return (isAssociativeAndCommutative(Inst) ||
+ isAssociativeAndCommutative(Inst, /* Invert */ true)) &&
hasReassociableOperands(Inst, Inst.getParent()) &&
hasReassociableSibling(Inst, Commuted);
}
@@ -801,6 +812,111 @@ TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
return false;
}
+std::pair<unsigned, unsigned>
+TargetInstrInfo::getReassociationOpcodes(MachineCombinerPattern Pattern,
+ const MachineInstr &Root,
+ const MachineInstr &Prev) const {
+ bool AssocCommutRoot = isAssociativeAndCommutative(Root);
+ bool AssocCommutPrev = isAssociativeAndCommutative(Prev);
+
+ // Early exit if both opcodes are associative and commutative. It's a trivial
+ // reassociation when we only change operands order. In this case opcodes are
+ // not required to have inverse versions.
+ if (AssocCommutRoot && AssocCommutPrev) {
+ assert(Root.getOpcode() == Prev.getOpcode() && "Expected to be equal");
+ return std::make_pair(Root.getOpcode(), Root.getOpcode());
+ }
+
+ // At least one instruction is not associative or commutative.
+ // Since we have matched one of the reassociation patterns, we expect that the
+ // instructions' opcodes are equal or one of them is the inversion of the
+ // other.
+ assert(areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()) &&
+ "Incorrectly matched pattern");
+ unsigned AssocCommutOpcode = Root.getOpcode();
+ unsigned InverseOpcode = getInverseOpcode(Root.getOpcode()).value();
+ if (!AssocCommutRoot)
+ std::swap(AssocCommutOpcode, InverseOpcode);
+
+ // The transformation rule (`+` is any associative and commutative binary
+ // operation, `-` is the inverse):
+ // REASSOC_AX_BY:
+ // (A + X) + Y => A + (X + Y)
+ // (A + X) - Y => A + (X - Y)
+ // (A - X) + Y => A - (X - Y)
+ // (A - X) - Y => A - (X + Y)
+ // REASSOC_XA_BY:
+ // (X + A) + Y => (X + Y) + A
+ // (X + A) - Y => (X - Y) + A
+ // (X - A) + Y => (X + Y) - A
+ // (X - A) - Y => (X - Y) - A
+ // REASSOC_AX_YB:
+ // Y + (A + X) => (Y + X) + A
+ // Y - (A + X) => (Y - X) - A
+ // Y + (A - X) => (Y - X) + A
+ // Y - (A - X) => (Y + X) - A
+ // REASSOC_XA_YB:
+ // Y + (X + A) => (Y + X) + A
+ // Y - (X + A) => (Y - X) - A
+ // Y + (X - A) => (Y + X) - A
+ // Y - (X - A) => (Y - X) + A
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ break;
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ if (!AssocCommutRoot && AssocCommutPrev)
+ return {InverseOpcode, InverseOpcode};
+ if (AssocCommutRoot && !AssocCommutPrev)
+ return {InverseOpcode, AssocCommutOpcode};
+ if (!AssocCommutRoot && !AssocCommutPrev)
+ return {AssocCommutOpcode, InverseOpcode};
+ break;
+ }
+ llvm_unreachable("Unhandled combination");
+}
+
+// Return a pair of boolean flags showing if the new root and new prev operands
+// must be swapped. See visual example of the rule in
+// TargetInstrInfo::getReassociationOpcodes.
+static std::pair<bool, bool> mustSwapOperands(MachineCombinerPattern Pattern) {
+ switch (Pattern) {
+ default:
+ llvm_unreachable("Unexpected pattern");
+ case MachineCombinerPattern::REASSOC_AX_BY:
+ return {false, false};
+ case MachineCombinerPattern::REASSOC_XA_BY:
+ return {true, false};
+ case MachineCombinerPattern::REASSOC_AX_YB:
+ return {true, true};
+ case MachineCombinerPattern::REASSOC_XA_YB:
+ return {true, true};
+ }
+}
+
/// Attempt the reassociation transformation to reduce critical path length.
/// See the above comments before getMachineCombinerPatterns().
void TargetInstrInfo::reassociateOps(
@@ -863,21 +979,35 @@ void TargetInstrInfo::reassociateOps(
Register NewVR = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- unsigned Opcode = Root.getOpcode();
+ auto [NewRootOpc, NewPrevOpc] = getReassociationOpcodes(Pattern, Root, Prev);
bool KillA = OpA.isKill();
bool KillX = OpX.isKill();
bool KillY = OpY.isKill();
+ bool KillNewVR = true;
+
+ auto [SwapRootOperands, SwapPrevOperands] = mustSwapOperands(Pattern);
+
+ if (SwapPrevOperands) {
+ std::swap(RegX, RegY);
+ std::swap(KillX, KillY);
+ }
// Create new instructions for insertion.
MachineInstrBuilder MIB1 =
- BuildMI(*MF, MIMetadata(Prev), TII->get(Opcode), NewVR)
+ BuildMI(*MF, MIMetadata(Prev), TII->get(NewPrevOpc), NewVR)
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY))
.setMIFlags(Prev.getFlags());
+
+ if (SwapRootOperands) {
+ std::swap(RegA, NewVR);
+ std::swap(KillA, KillNewVR);
+ }
+
MachineInstrBuilder MIB2 =
- BuildMI(*MF, MIMetadata(Root), TII->get(Opcode), RegC)
+ BuildMI(*MF, MIMetadata(Root), TII->get(NewRootOpc), RegC)
.addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true))
+ .addReg(NewVR, getKillRegState(KillNewVR))
.setMIFlags(Root.getFlags());
setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6b8bc31992736..96e05c559b5f1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4939,8 +4939,10 @@ static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
// 1. Other data types (integer, vectors)
// 2. Other math / logic operations (xor, or)
// 3. Other forms of the same operation (intrinsics and other variants)
-bool AArch64InstrInfo::isAssociativeAndCommutative(
- const MachineInstr &Inst) const {
+bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const {
+ if (Invert)
+ return false;
switch (Inst.getOpcode()) {
case AArch64::FADDDrr:
case AArch64::FADDSrr:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 604674d401335..f5d866df271c8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -260,8 +260,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
SmallVectorImpl<MachineCombinerPattern> &Patterns,
bool DoRegPressureReduce) const override;
/// Return true when Inst is associative and commutative so that it can be
- /// reassociated.
- bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ /// reassociated. If Invert is true, then the inverse of Inst operation must
+ /// be checked.
+ bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const override;
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0c6d3a2ea4339..845f7ade39908 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -251,7 +251,10 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
// reduce the critical path. Mostly, this means floating-point operations,
// because they have high latencies(>=5) (compared to other operations, such as
// and/or, which are also associative and commutative, but have low latencies).
-bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const {
+ if (Invert)
+ return false;
switch (Inst.getOpcode()) {
// Floating point:
// FP Add:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ab2abb93ec098..da983b1ec6caa 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -498,7 +498,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
SmallVectorImpl<MachineInstr *> &InsInstrs) const override;
- bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const override;
/// On PowerPC, we try to reassociate FMA chain which will increase
/// instruction size. Set extension resource length limit to 1 for edge case.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index cc1858d23a181..5104bfc993333 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1200,9 +1200,12 @@ bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
return RISCV::hasEqualFRM(Inst, Sibling);
}
-bool RISCVInstrInfo::isAssociativeAndCommutative(
- const MachineInstr &Inst) const {
+bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const {
unsigned Opc = Inst.getOpcode();
+ if (Invert)
+ return false;
+
if (isFADD(Opc) || isFMUL(Opc))
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index fb8e8bf9883de..eac190f7c99df 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -205,7 +205,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool hasReassociableSibling(const MachineInstr &Inst,
bool &Commuted) const override;
- bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const override;
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index eb2cf174dc997..d12955b4f3139 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8716,7 +8716,10 @@ bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
// 1. Other data types (integer, vectors)
// 2. Other math / logic operations (xor, or)
// 3. Other forms of the same operation (intrinsics and other variants)
-bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const {
+ if (Invert)
+ return false;
switch (Inst.getOpcode()) {
case X86::ADD8rr:
case X86::ADD16rr:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index bf86e5db9bd6b..64284e1e73ee1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -506,7 +506,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
bool useMachineCombiner() const override { return true; }
- bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ bool isAssociativeAndCommutative(const MachineInstr &Inst,
+ bool Invert) const override;
bool hasReassociableOperands(const MachineInstr &Inst,
const MachineBasicBlock *MBB) const override;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index a8d61acab1831..d26ac801fa2dc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -711,7 +711,7 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: ldurb w11, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: ldrb w8, [x8]
; CHECK-NOLSE-O1-NEXT: add w9, w9, w11
-; CHECK-NOLSE-O1-NEXT: add w9, w10, w9
+; CHECK-NOLSE-O1-NEXT: add w9, w9, w10
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -735,7 +735,7 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O1-NEXT: ldrb w9, [x0, w1, sxtw]
; CHECK-LSE-O1-NEXT: ldurb w10, [x0, #-256]
; CHECK-LSE-O1-NEXT: add w8, w8, w10
-; CHECK-LSE-O1-NEXT: add w8, w9, w8
+; CHECK-LSE-O1-NEXT: add w8, w8, w9
; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: ldrb w9, [x9]
; CHECK-LSE-O1-NEXT: add w0, w8, w9
@@ -781,7 +781,7 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: ldurh w11, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: ldrh w8, [x8]
; CHECK-NOLSE-O1-NEXT: add w9, w9, w11
-; CHECK-NOLSE-O1-NEXT: add w9, w10, w9
+; CHECK-NOLSE-O1-NEXT: add w9, w9, w10
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -805,7 +805,7 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1]
; CHECK-LSE-O1-NEXT: ldurh w10, [x0, #-256]
; CHECK-LSE-O1-NEXT: add w8, w8, w10
-; CHECK-LSE-O1-NEXT: add w8, w9, w8
+; CHECK-LSE-O1-NEXT: add w8, w8, w9
; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: ldrh w9, [x9]
; CHECK-LSE-O1-NEXT: add w0, w8, w9
@@ -851,7 +851,7 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: ldur w11, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: ldr w8, [x8]
; CHECK-NOLSE-O1-NEXT: add w9, w9, w11
-; CHECK-NOLSE-O1-NEXT: add w9, w10, w9
+; CHECK-NOLSE-O1-NEXT: add w9, w9, w10
; CHECK-NOLSE-O1-NEXT: add w0, w9, w8
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -873,7 +873,7 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2]
; CHECK-LSE-O1-NEXT: ldur w10, [x0, #-256]
; CHECK-LSE-O1-NEXT: add w8, w8, w10
-; CHECK-LSE-O1-NEXT: add w8, w9, w8
+; CHECK-LSE-O1-NEXT: add w8, w8, w9
; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: ldr w9, [x9]
; CHECK-LSE-O1-NEXT: add w0, w8, w9
@@ -917,7 +917,7 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 {
; CHECK-NOLSE-O1-NEXT: ldur x11, [x0, #-256]
; CHECK-NOLSE-O1-NEXT: ldr x8, [x8]
; CHECK-NOLSE-O1-NEXT: add x9, x9, x11
-; CHECK-NOLSE-O1-NEXT: add x9, x10, x9
+; CHECK-NOLSE-O1-NEXT: add x9, x9, x10
; CHECK-NOLSE-O1-NEXT: add x0, x9, x8
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -939,7 +939,7 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 {
; CHECK-LSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3]
; CHECK-LSE-O1-NEXT: ldur x10, [x0, #-256]
; CHECK-LSE-O1-NEXT: add x8, x8, x10
-; CHECK-LSE-O1-NEXT: add x8, x9, x8
+; CHECK-LSE-O1-NEXT: add x8, x8, x9
; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936
; CHECK-LSE-O1-NEXT: ldr x9, [x9]
; CHECK-LSE-O1-NEXT: add x0, x8, x9
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 07d0f0616e192..647a9ac849f48 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -389,8 +389,8 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) {
; CHECK-NEXT: renamable $w10 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff)
; CHECK-NEXT: renamable $w11 = LDURBBi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $w8 = LDRBBui killed renamable $x8, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random)
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0
; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-NEXT: RET undef $lr, implicit $w0
%ptr_unsigned = getelementptr i8, ptr %p, i32 4095
@@ -421,8 +421,8 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) {
; CHECK-NEXT: renamable $w10 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff)
; CHECK-NEXT: renamable $w11 = LDURHHi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $w8 = LDRHHui killed renamable $x8, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random)
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0
; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-NEXT: RET undef $lr, implicit $w0
%ptr_unsigned = getelementptr i16, ptr %p, i32 4095
@@ -453,8 +453,8 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) {
; CHECK-NEXT: renamable $w10 = LDRWroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s32) from %ir.ptr_regoff)
; CHECK-NEXT: renamable $w11 = LDURWi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $w8 = LDRWui killed renamable $x8, 0, pcsections !0 :: (load unordered (s32) from %ir.ptr_random)
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0
- ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w10, killed renamable $w9, 0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0
+ ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0
; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0
; CHECK-NEXT: RET undef $lr, implicit $w0
%ptr_unsigned = getelementptr i32, ptr %p, i32 4095
@@ -485,8 +485,8 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) {
; CHECK-NEXT: renamable $x10 = LDRXroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s64) from %ir.ptr_regoff)
; CHECK-NEXT: renamable $x11 = LDURXi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unscaled)
; CHECK-NEXT: renamable $x8 = LDRXui killed renamable $x8, 0, pcsections !0 :: (load unordered (s64) from %ir.ptr_random)
- ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x11, 0
- ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x10, killed renamable $x9, 0
+ ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x11, 0, pcsections !0
+ ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x10, 0, pcsections !0
; CHECK-NEXT: $x0 = ADDXrs killed renamable $x9, killed renamable $x8, 0, pcsections !0
; CHECK-NEXT: RET undef $lr, implicit $x0
%ptr_unsigned = getelementptr i64, ptr %p, i32 4095
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 6d7909070f6d9..2e2eb4ae84060 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -850,14 +850,14 @@ define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind {
; GISEL-NEXT: and x12, x8, #0xff00000000
; GISEL-NEXT: and x13, x9, #0xff0000000000
; GISEL-NEXT: orr x10, x11, x10
-; GISEL-NEXT: orr x11, x12, x13
+; GISEL-NEXT: orr x11, x13, x12
; GISEL-NEXT: and x12, x8, #0xff0000
; GISEL-NEXT: and x13, x9, #0xff000000
-; GISEL-NEXT: orr x12, x12, x13
+; GISEL-NEXT: orr x12, x13, x12
; GISEL-NEXT: and x8, x8, #0xff
-; GISEL-NEXT: orr x10, x10, x11
-; GISEL-NEXT: orr x8, x12, x8
-; GISEL-NEXT: orr x8, x10, x8
+; GISEL-NEXT: orr x10, x11, x10
+; GISEL-NEXT: orr x8, x8, x12
+; GISEL-NEXT: orr x8, x8, x10
; GISEL-NEXT: and x9, x9, #0xff00
; GISEL-NEXT: orr x0, x9, x8
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index 62430287e30a0..ea56d228ddc99 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -75,7 +75,7 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
+; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
@@ -94,8 +94,8 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds3:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
+; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
+; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
@@ -114,8 +114,8 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds4:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
+; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
+; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
@@ -174,8 +174,8 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds6:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
+; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
+; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
@@ -196,8 +196,8 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
; CHECK-UNSAFE-LABEL: reassociate_muls1:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fmul s1, s2, s3
-; CHECK-UNSAFE-NEXT: fmul s0, s0, s1
+; CHECK-UNSAFE-NEXT: fmul s1, s3, s2
+; CHECK-UNSAFE-NEXT: fmul s0, s1, s0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fmul float %x2, %t0
@@ -218,8 +218,8 @@ define double @reassociate_adds_double(double %x0, double %x1, double %x2, doubl
; CHECK-UNSAFE-LABEL: reassociate_adds_double:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fadd d1, d2, d3
-; CHECK-UNSAFE-NEXT: fadd d0, d0, d1
+; CHECK-UNSAFE-NEXT: fadd d1, d3, d2
+; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fadd double %x2, %t0
@@ -240,8 +240,8 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
; CHECK-UNSAFE-LABEL: reassociate_muls_double:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fmul d1, d2, d3
-; CHECK-UNSAFE-NEXT: fmul d0, d0, d1
+; CHECK-UNSAFE-NEXT: fmul d1, d3, d2
+; CHECK-UNSAFE-NEXT: fmul d0, d1, d0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fmul double %x2, %t0
@@ -283,7 +283,7 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
@@ -302,8 +302,8 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
; CHECK-UNSAFE-LABEL: vector_reassociate_adds3:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %t0, %x2
@@ -322,8 +322,8 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
; CHECK-UNSAFE-LABEL: vector_reassociate_adds4:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
@@ -343,8 +343,8 @@ define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fmul v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s
+; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fmul <4 x float> %x2, %t0
@@ -365,8 +365,8 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1,
; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d
-; CHECK-UNSAFE-NEXT: fmul v1.2d, v2.2d, v3.2d
-; CHECK-UNSAFE-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d
+; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d
; CHECK-UNSAFE-NEXT: ret
%t0 = fadd <2 x double> %x0, %x1
%t1 = fmul <2 x double> %x2, %t0
diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
index 8c41ace73d7af..5eb1841e8f17d 100644
--- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -24,9 +24,9 @@ define float @fsqrt(float %a) #0 {
; CHECK-NEXT: frsqrts s2, s0, s2
; CHECK-NEXT: fmul s1, s1, s2
; CHECK-NEXT: fmul s2, s1, s1
-; CHECK-NEXT: fmul s1, s1, s0
+; CHECK-NEXT: fmul s1, s0, s1
; CHECK-NEXT: frsqrts s2, s0, s2
-; CHECK-NEXT: fmul s1, s2, s1
+; CHECK-NEXT: fmul s1, s1, s2
; CHECK-NEXT: fcsel s0, s0, s1, eq
; CHECK-NEXT: ret
%1 = tail call fast float @llvm.sqrt.f32(float %a)
@@ -47,9 +47,9 @@ define float @fsqrt_ieee_denorms(float %a) #1 {
; CHECK-NEXT: frsqrts s2, s0, s2
; CHECK-NEXT: fmul s1, s1, s2
; CHECK-NEXT: fmul s2, s1, s1
-; CHECK-NEXT: fmul s1, s1, s0
+; CHECK-NEXT: fmul s1, s0, s1
; CHECK-NEXT: frsqrts s2, s0, s2
-; CHECK-NEXT: fmul s1, s2, s1
+; CHECK-NEXT: fmul s1, s1, s2
; CHECK-NEXT: fcsel s0, s0, s1, eq
; CHECK-NEXT: ret
%1 = tail call fast float @llvm.sqrt.f32(float %a)
@@ -69,9 +69,9 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 {
; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s
; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s
; CHECK-NEXT: fmul v2.2s, v1.2s, v1.2s
-; CHECK-NEXT: fmul v1.2s, v1.2s, v0.2s
+; CHECK-NEXT: fmul v1.2s, v0.2s, v1.2s
; CHECK-NEXT: frsqrts v2.2s, v0.2s, v2.2s
-; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s
+; CHECK-NEXT: fmul v1.2s, v1.2s, v2.2s
; CHECK-NEXT: fcmeq v2.2s, v0.2s, #0.0
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
@@ -92,9 +92,9 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 {
; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s
; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
; CHECK-NEXT: fmul v2.4s, v1.4s, v1.4s
-; CHECK-NEXT: fmul v1.4s, v1.4s, v0.4s
+; CHECK-NEXT: fmul v1.4s, v0.4s, v1.4s
; CHECK-NEXT: frsqrts v2.4s, v0.4s, v2.4s
-; CHECK-NEXT: fmul v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
@@ -119,16 +119,16 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 {
; CHECK-NEXT: frsqrts v5.4s, v1.4s, v5.4s
; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s
; CHECK-NEXT: fmul v4.4s, v2.4s, v2.4s
-; CHECK-NEXT: fmul v2.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmul v2.4s, v0.4s, v2.4s
; CHECK-NEXT: frsqrts v4.4s, v0.4s, v4.4s
; CHECK-NEXT: fmul v3.4s, v3.4s, v5.4s
; CHECK-NEXT: fmul v5.4s, v3.4s, v3.4s
-; CHECK-NEXT: fmul v3.4s, v3.4s, v1.4s
+; CHECK-NEXT: fmul v3.4s, v1.4s, v3.4s
; CHECK-NEXT: frsqrts v5.4s, v1.4s, v5.4s
-; CHECK-NEXT: fmul v2.4s, v4.4s, v2.4s
+; CHECK-NEXT: fmul v2.4s, v2.4s, v4.4s
; CHECK-NEXT: fcmeq v4.4s, v0.4s, #0.0
; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-NEXT: fmul v3.4s, v5.4s, v3.4s
+; CHECK-NEXT: fmul v3.4s, v3.4s, v5.4s
; CHECK-NEXT: fcmeq v5.4s, v1.4s, #0.0
; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-NEXT: ret
@@ -153,9 +153,9 @@ define double @dsqrt(double %a) #0 {
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
-; CHECK-NEXT: fmul d1, d1, d0
+; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: frsqrts d2, d0, d2
-; CHECK-NEXT: fmul d1, d2, d1
+; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fcsel d0, d0, d1, eq
; CHECK-NEXT: ret
%1 = tail call fast double @llvm.sqrt.f64(double %a)
@@ -179,9 +179,9 @@ define double @dsqrt_ieee_denorms(double %a) #1 {
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
-; CHECK-NEXT: fmul d1, d1, d0
+; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: frsqrts d2, d0, d2
-; CHECK-NEXT: fmul d1, d2, d1
+; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fcsel d0, d0, d1, eq
; CHECK-NEXT: ret
%1 = tail call fast double @llvm.sqrt.f64(double %a)
@@ -204,9 +204,9 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 {
; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
-; CHECK-NEXT: fmul v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmul v1.2d, v0.2d, v1.2d
; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
-; CHECK-NEXT: fmul v1.2d, v2.2d, v1.2d
+; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: ret
@@ -237,16 +237,16 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 {
; CHECK-NEXT: frsqrts v5.2d, v1.2d, v5.2d
; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d
; CHECK-NEXT: fmul v4.2d, v2.2d, v2.2d
-; CHECK-NEXT: fmul v2.2d, v2.2d, v0.2d
+; CHECK-NEXT: fmul v2.2d, v0.2d, v2.2d
; CHECK-NEXT: frsqrts v4.2d, v0.2d, v4.2d
; CHECK-NEXT: fmul v3.2d, v3.2d, v5.2d
; CHECK-NEXT: fmul v5.2d, v3.2d, v3.2d
-; CHECK-NEXT: fmul v3.2d, v3.2d, v1.2d
+; CHECK-NEXT: fmul v3.2d, v1.2d, v3.2d
; CHECK-NEXT: frsqrts v5.2d, v1.2d, v5.2d
-; CHECK-NEXT: fmul v2.2d, v4.2d, v2.2d
+; CHECK-NEXT: fmul v2.2d, v2.2d, v4.2d
; CHECK-NEXT: fcmeq v4.2d, v0.2d, #0.0
; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
-; CHECK-NEXT: fmul v3.2d, v5.2d, v3.2d
+; CHECK-NEXT: fmul v3.2d, v3.2d, v5.2d
; CHECK-NEXT: fcmeq v5.2d, v1.2d, #0.0
; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
index 1b9af71edb0c2..7c38dedd56ff6 100644
--- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -25,7 +25,7 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; CHECK: # %bb.0:
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
; CHECK: fadds [[REG1:[0-9]+]], 3, 4
-; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK: fadds 1, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz float %x0, %x1
@@ -38,8 +38,8 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds3:
; CHECK: # %bb.0:
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
-; CHECK: fadds [[REG1:[0-9]+]], 3, 4
-; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK: fadds [[REG1:[0-9]+]], 4, 3
+; CHECK: fadds 1, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz float %x0, %x1
@@ -52,8 +52,8 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; CHECK-LABEL: reassociate_adds4:
; CHECK: # %bb.0:
; CHECK: fadds [[REG0:[0-9]+]], 1, 2
-; CHECK: fadds [[REG1:[0-9]+]], 3, 4
-; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK: fadds [[REG1:[0-9]+]], 4, 3
+; CHECK: fadds 1, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz float %x0, %x1
@@ -108,7 +108,7 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
; CHECK: # %bb.0:
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
-; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
@@ -121,8 +121,8 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
; CHECK-LABEL: vector_reassociate_adds3:
; CHECK: # %bb.0:
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
-; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
-; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 37, 36
+; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
@@ -135,8 +135,8 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
; CHECK-LABEL: vector_reassociate_adds4:
; CHECK: # %bb.0:
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
-; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
-; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 37, 36
+; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index e77c4abed7c66..8c5db3479f309 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -21,7 +21,7 @@ define double @test_reassoc_fadd2(double %a0, double %a1, double %a2, double %a3
; CHECK: # %bb.0:
; CHECK-NEXT: fadd.d ft0, fa0, fa1
; CHECK-NEXT: fadd.d ft1, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, ft0, ft1
+; CHECK-NEXT: fadd.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -33,8 +33,8 @@ define double @test_reassoc_fadd3(double %a0, double %a1, double %a2, double %a3
; CHECK-LABEL: test_reassoc_fadd3:
; CHECK: # %bb.0:
; CHECK-NEXT: fadd.d ft0, fa0, fa1
-; CHECK-NEXT: fadd.d ft1, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, ft0, ft1
+; CHECK-NEXT: fadd.d ft1, fa3, fa2
+; CHECK-NEXT: fadd.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %t0, %a2
@@ -46,8 +46,8 @@ define double @test_reassoc_fadd4(double %a0, double %a1, double %a2, double %a3
; CHECK-LABEL: test_reassoc_fadd4:
; CHECK: # %bb.0:
; CHECK-NEXT: fadd.d ft0, fa0, fa1
-; CHECK-NEXT: fadd.d ft1, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, ft0, ft1
+; CHECK-NEXT: fadd.d ft1, fa3, fa2
+; CHECK-NEXT: fadd.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -73,7 +73,7 @@ define double @test_reassoc_fmul2(double %a0, double %a1, double %a2, double %a3
; CHECK: # %bb.0:
; CHECK-NEXT: fmul.d ft0, fa0, fa1
; CHECK-NEXT: fmul.d ft1, fa2, fa3
-; CHECK-NEXT: fmul.d fa0, ft0, ft1
+; CHECK-NEXT: fmul.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %a2, %t0
@@ -85,8 +85,8 @@ define double @test_reassoc_fmul3(double %a0, double %a1, double %a2, double %a3
; CHECK-LABEL: test_reassoc_fmul3:
; CHECK: # %bb.0:
; CHECK-NEXT: fmul.d ft0, fa0, fa1
-; CHECK-NEXT: fmul.d ft1, fa2, fa3
-; CHECK-NEXT: fmul.d fa0, ft0, ft1
+; CHECK-NEXT: fmul.d ft1, fa3, fa2
+; CHECK-NEXT: fmul.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %t0, %a2
@@ -98,8 +98,8 @@ define double @test_reassoc_fmul4(double %a0, double %a1, double %a2, double %a3
; CHECK-LABEL: test_reassoc_fmul4:
; CHECK: # %bb.0:
; CHECK-NEXT: fmul.d ft0, fa0, fa1
-; CHECK-NEXT: fmul.d ft1, fa2, fa3
-; CHECK-NEXT: fmul.d fa0, ft0, ft1
+; CHECK-NEXT: fmul.d ft1, fa3, fa2
+; CHECK-NEXT: fmul.d fa0, ft1, ft0
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %a2, %t0
@@ -135,7 +135,7 @@ define double @test_reassoc_big2(double %a0, double %a1, i32 %a2, double %a3, i3
; CHECK-NEXT: fmul.d ft1, ft1, fa1
; CHECK-NEXT: fadd.d ft2, fa0, fa1
; CHECK-NEXT: fadd.d ft3, fa2, fa1
-; CHECK-NEXT: fmul.d ft0, ft0, ft1
+; CHECK-NEXT: fmul.d ft0, ft1, ft0
; CHECK-NEXT: fadd.d ft1, fa2, ft2
; CHECK-NEXT: fmul.d ft2, fa0, ft3
; CHECK-NEXT: fsub.d ft1, fa3, ft1
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 81cfa94d8b1f1..f60eec9b9e3ea 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1243,8 +1243,8 @@ define <64 x i8> @test16(i64 %x) {
; X86-NEXT: kmovd %eax, %k2
; X86-NEXT: kshiftlq $63, %k2, %k2
; X86-NEXT: kshiftrq $58, %k2, %k2
-; X86-NEXT: korq %k2, %k1, %k1
-; X86-NEXT: korq %k1, %k0, %k0
+; X86-NEXT: korq %k1, %k2, %k1
+; X86-NEXT: korq %k0, %k1, %k0
; X86-NEXT: vpmovm2b %k0, %zmm0
; X86-NEXT: retl
%a = bitcast i64 %x to <64 x i1>
@@ -1364,8 +1364,8 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
; X86-NEXT: kmovd %eax, %k2
; X86-NEXT: kshiftlq $63, %k2, %k2
; X86-NEXT: kshiftrq $58, %k2, %k2
-; X86-NEXT: korq %k2, %k1, %k1
-; X86-NEXT: korq %k1, %k0, %k0
+; X86-NEXT: korq %k1, %k2, %k1
+; X86-NEXT: korq %k0, %k1, %k0
; X86-NEXT: vpmovm2b %k0, %zmm0
; X86-NEXT: retl
%a = bitcast i64 %x to <64 x i1>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index 6a46a6868fcc7..2654a300e2052 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -53,8 +53,8 @@ define <32 x half> @test_sqrt_ph_512_fast_estimate_attribute_2(<32 x half> %a0,
; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0
; CHECK-NEXT: vfmadd213ph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm0
; CHECK-NEXT: vmulph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm2, %zmm2
-; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vmulph %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vmulph %zmm2, %zmm0, %zmm0
; CHECK-NEXT: retq
%1 = call fast <32 x half> @llvm.sqrt.v32f16(<32 x half> %a0)
%2 = fdiv fast <32 x half> %a1, %1
diff --git a/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll b/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
index d8ab0de2ca97b..ca193d84148bf 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
@@ -26,7 +26,7 @@ define half @reassociate_adds2(half %x0, half %x1, half %x2, half %x3) {
; CHECK: # %bb.0:
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz half %x0, %x1
%t1 = fadd reassoc nsz half %x2, %t0
@@ -38,8 +38,8 @@ define half @reassociate_adds3(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_adds3:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz half %x0, %x1
%t1 = fadd reassoc nsz half %t0, %x2
@@ -51,8 +51,8 @@ define half @reassociate_adds4(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_adds4:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz half %x0, %x1
%t1 = fadd reassoc nsz half %x2, %t0
@@ -93,8 +93,8 @@ define half @reassociate_adds6(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_adds6:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fdiv reassoc nsz half %x0, %x1
%t1 = fadd reassoc nsz half %x2, %t0
@@ -108,8 +108,8 @@ define half @reassociate_muls1(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_muls1:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vmulsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmulsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vmulsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fdiv reassoc nsz half %x0, %x1
%t1 = fmul reassoc nsz half %x2, %t0
@@ -123,8 +123,8 @@ define <8 x half> @reassociate_adds_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-LABEL: reassociate_adds_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddph %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fdiv reassoc nsz <8 x half> %x0, %x1
%t1 = fadd reassoc nsz <8 x half> %x2, %t0
@@ -138,8 +138,8 @@ define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-LABEL: reassociate_muls_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vmulph %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz <8 x half> %x0, %x1
%t1 = fmul reassoc nsz <8 x half> %x2, %t0
@@ -153,8 +153,8 @@ define <16 x half> @reassociate_adds_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-LABEL: reassociate_adds_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vaddph %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm1
+; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%t0 = fdiv reassoc nsz <16 x half> %x0, %x1
%t1 = fadd reassoc nsz <16 x half> %x2, %t0
@@ -168,8 +168,8 @@ define <16 x half> @reassociate_muls_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-LABEL: reassociate_muls_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vmulph %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm1
+; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz <16 x half> %x0, %x1
%t1 = fmul reassoc nsz <16 x half> %x2, %t0
@@ -183,8 +183,8 @@ define <32 x half> @reassociate_adds_v32f16(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-LABEL: reassociate_adds_v32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vaddph %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddph %zmm2, %zmm3, %zmm1
+; CHECK-NEXT: vaddph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%t0 = fdiv reassoc nsz <32 x half> %x0, %x1
%t1 = fadd reassoc nsz <32 x half> %x2, %t0
@@ -198,8 +198,8 @@ define <32 x half> @reassociate_muls_v32f16(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-LABEL: reassociate_muls_v32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vmulph %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vmulph %zmm2, %zmm3, %zmm1
+; CHECK-NEXT: vmulph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%t0 = fadd reassoc nsz <32 x half> %x0, %x1
%t1 = fmul reassoc nsz <32 x half> %x2, %t0
@@ -213,8 +213,8 @@ define half @reassociate_mins_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_mins_half:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vminsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vminsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vminsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fdiv half %x0, %x1
%cmp1 = fcmp olt half %x2, %t0
@@ -230,8 +230,8 @@ define half @reassociate_maxs_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-LABEL: reassociate_maxs_half:
; CHECK: # %bb.0:
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vmaxsh %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmaxsh %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fdiv half %x0, %x1
%cmp1 = fcmp ogt half %x2, %t0
@@ -247,8 +247,8 @@ define <8 x half> @reassociate_mins_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-LABEL: reassociate_mins_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vminph %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vminph %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vminph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd <8 x half> %x0, %x1
%cmp1 = fcmp olt <8 x half> %x2, %t0
@@ -264,8 +264,8 @@ define <8 x half> @reassociate_maxs_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-LABEL: reassociate_maxs_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vmaxph %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmaxph %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm0
; CHECK-NEXT: retq
%t0 = fadd <8 x half> %x0, %x1
%cmp1 = fcmp ogt <8 x half> %x2, %t0
@@ -281,8 +281,8 @@ define <16 x half> @reassociate_mins_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-LABEL: reassociate_mins_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vminph %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vminph %ymm2, %ymm3, %ymm1
+; CHECK-NEXT: vminph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%t0 = fadd <16 x half> %x0, %x1
%cmp1 = fcmp olt <16 x half> %x2, %t0
@@ -298,8 +298,8 @@ define <16 x half> @reassociate_maxs_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-LABEL: reassociate_maxs_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: vmaxph %ymm3, %ymm2, %ymm1
-; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vmaxph %ymm2, %ymm3, %ymm1
+; CHECK-NEXT: vmaxph %ymm0, %ymm1, %ymm0
; CHECK-NEXT: retq
%t0 = fadd <16 x half> %x0, %x1
%cmp1 = fcmp ogt <16 x half> %x2, %t0
@@ -315,8 +315,8 @@ define <32 x half> @reassociate_mins_v32f16(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-LABEL: reassociate_mins_v32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vminph %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vminph %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vminph %zmm2, %zmm3, %zmm1
+; CHECK-NEXT: vminph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%t0 = fadd <32 x half> %x0, %x1
%cmp1 = fcmp olt <32 x half> %x2, %t0
@@ -332,8 +332,8 @@ define <32 x half> @reassociate_maxs_v16f32(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-LABEL: reassociate_maxs_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vmaxph %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vmaxph %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vmaxph %zmm2, %zmm3, %zmm1
+; CHECK-NEXT: vmaxph %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%t0 = fadd <32 x half> %x0, %x1
%cmp1 = fcmp ogt <32 x half> %x2, %t0
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mscatter.ll b/llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
index 9f81f36a011de..5d7c468351ac2 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mscatter.ll
@@ -7,11 +7,11 @@ define void @test_mscatter_v16f16(ptr %base, <16 x i32> %index, <16 x half> %val
; CHECK-NEXT: vpbroadcastq %rdi, %zmm3
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vpmovsxdq %ymm2, %zmm2
-; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm4
-; CHECK-NEXT: vpaddq %zmm4, %zmm2, %zmm2
+; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm4
+; CHECK-NEXT: vpaddq %zmm2, %zmm4, %zmm2
; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0
-; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm3
-; CHECK-NEXT: vpaddq %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm3
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
; CHECK-NEXT: vmovq %xmm0, %rax
; CHECK-NEXT: vmovsh %xmm1, (%rax)
; CHECK-NEXT: vpsrld $16, %xmm1, %xmm3
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index 468e02d8884ad..e274557534774 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -4358,8 +4358,8 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; X86-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
; X86-NEXT: vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
; X86-NEXT: vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
-; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
-; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
+; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc3]
+; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc2]
; X86-NEXT: retl # encoding: [0xc3]
;
; X64-LABEL: test_x86_vcvtps2ph_128:
@@ -4368,8 +4368,8 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; X64-NEXT: vcvtps2ph $2, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc2,0x02]
; X64-NEXT: vcvtps2ph $10, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc3,0x0a]
; X64-NEXT: vcvtps2ph $11, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x0b]
-; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
-; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
+; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc3]
+; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc2]
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
%res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 10, <8 x i16> zeroinitializer, i8 %mask)
@@ -4389,8 +4389,8 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
; X86-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
; X86-NEXT: vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
; X86-NEXT: vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
-; X86-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
-; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
+; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc3]
+; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc2]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -4400,8 +4400,8 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
; X64-NEXT: vcvtps2ph $2, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc2,0x02]
; X64-NEXT: vcvtps2ph $11, %ymm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc3,0x0b]
; X64-NEXT: vcvtps2ph $12, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x0c]
-; X64-NEXT: vpaddw %xmm1, %xmm3, %xmm0 # encoding: [0xc5,0xe1,0xfd,0xc1]
-; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # encoding: [0xc5,0xe9,0xfd,0xc0]
+; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xfd,0xc3]
+; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc2]
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X64-NEXT: retq # encoding: [0xc3]
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
diff --git a/llvm/test/CodeGen/X86/horizontal-sum.ll b/llvm/test/CodeGen/X86/horizontal-sum.ll
index 3565165dc863a..542adaedd5600 100644
--- a/llvm/test/CodeGen/X86/horizontal-sum.ll
+++ b/llvm/test/CodeGen/X86/horizontal-sum.ll
@@ -703,14 +703,14 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
; AVX1-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0]
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
; AVX1-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
-; AVX1-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[1,1,1,1]
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[0,0,0,0]
; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2]
-; AVX1-SLOW-NEXT: vpaddd %xmm4, %xmm2, %xmm2
-; AVX1-SLOW-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm4, %xmm2
+; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm3, %xmm1
; AVX1-SLOW-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX1-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; AVX1-SLOW-NEXT: retq
@@ -727,14 +727,14 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
; AVX1-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0]
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
; AVX1-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
-; AVX1-FAST-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-FAST-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-FAST-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-FAST-NEXT: vphaddd %xmm3, %xmm3, %xmm1
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[2,2,2,2]
; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; AVX1-FAST-NEXT: vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm3, %xmm1
+; AVX1-FAST-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX1-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; AVX1-FAST-NEXT: retq
;
@@ -759,8 +759,8 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3]
-; AVX2-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX2-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: sequential_sum_v4i32_v4i32:
@@ -782,8 +782,8 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
; AVX2-FAST-NEXT: vpbroadcastd %xmm4, %xmm4
; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3]
-; AVX2-FAST-NEXT: vpaddd %xmm0, %xmm1, %xmm0
-; AVX2-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; AVX2-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX2-FAST-NEXT: retq
%5 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 0, i32 4>
%6 = shufflevector <4 x i32> %0, <4 x i32> %1, <2 x i32> <i32 1, i32 5>
diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
index f02b4d1c7726d..d65bf782d7994 100644
--- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -16,8 +16,8 @@ define <4 x i32> @reassociate_and_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX2-LABEL: reassociate_and_v4i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm1
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm3, %xmm1
+; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_and_v4i32:
@@ -43,8 +43,8 @@ define <4 x i32> @reassociate_or_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %
; AVX2-LABEL: reassociate_or_v4i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm1
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm2, %xmm3, %xmm1
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_or_v4i32:
@@ -70,8 +70,8 @@ define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX2-LABEL: reassociate_xor_v4i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm1
-; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm3, %xmm1
+; AVX2-NEXT: vpxor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_xor_v4i32:
@@ -102,8 +102,8 @@ define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX2-LABEL: reassociate_and_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm1
+; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_and_v8i32:
@@ -132,8 +132,8 @@ define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %
; AVX2-LABEL: reassociate_or_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm1
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_or_v8i32:
@@ -162,8 +162,8 @@ define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX2-LABEL: reassociate_xor_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm1
-; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm3, %ymm1
+; AVX2-NEXT: vpxor %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_xor_v8i32:
@@ -201,11 +201,11 @@ define <16 x i32> @reassociate_and_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2-LABEL: reassociate_and_v16i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpand %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_and_v16i32:
@@ -240,11 +240,11 @@ define <16 x i32> @reassociate_or_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i
; AVX2-LABEL: reassociate_or_v16i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpor %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpor %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_or_v16i32:
@@ -279,11 +279,11 @@ define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2-LABEL: reassociate_xor_v16i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpxor %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpxor %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpxor %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpxor %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_xor_v16i32:
@@ -311,8 +311,8 @@ define <16 x i8> @reassociate_umax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
; AVX-LABEL: reassociate_umax_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxub %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxub %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxub %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <16 x i8> %x0, %x1
@@ -336,8 +336,8 @@ define <8 x i16> @reassociate_umax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
; AVX-LABEL: reassociate_umax_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxuw %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxuw %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxuw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <8 x i16> %x0, %x1
@@ -374,8 +374,8 @@ define <4 x i32> @reassociate_umax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX-LABEL: reassociate_umax_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxud %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxud %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxud %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <4 x i32> %x0, %x1
@@ -439,8 +439,8 @@ define <2 x i64> @reassociate_umax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
; AVX512-LABEL: reassociate_umax_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmaxuq %xmm3, %xmm2, %xmm1
-; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmaxuq %xmm2, %xmm3, %xmm1
+; AVX512-NEXT: vpmaxuq %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%t0 = add <2 x i64> %x0, %x1
@@ -470,8 +470,8 @@ define <16 x i8> @reassociate_smax_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
; AVX-LABEL: reassociate_smax_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxsb %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsb %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxsb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <16 x i8> %x0, %x1
@@ -493,8 +493,8 @@ define <8 x i16> @reassociate_smax_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
; AVX-LABEL: reassociate_smax_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxsw %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsw %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxsw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <8 x i16> %x0, %x1
@@ -524,8 +524,8 @@ define <4 x i32> @reassociate_smax_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX-LABEL: reassociate_smax_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmaxsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmaxsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpmaxsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <4 x i32> %x0, %x1
@@ -584,8 +584,8 @@ define <2 x i64> @reassociate_smax_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
; AVX512-LABEL: reassociate_smax_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmaxsq %xmm3, %xmm2, %xmm1
-; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmaxsq %xmm2, %xmm3, %xmm1
+; AVX512-NEXT: vpmaxsq %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%t0 = add <2 x i64> %x0, %x1
@@ -607,8 +607,8 @@ define <16 x i8> @reassociate_umin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
; AVX-LABEL: reassociate_umin_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminub %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminub %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminub %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <16 x i8> %x0, %x1
@@ -635,8 +635,8 @@ define <8 x i16> @reassociate_umin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
; AVX-LABEL: reassociate_umin_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminuw %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminuw %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminuw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <8 x i16> %x0, %x1
@@ -672,8 +672,8 @@ define <4 x i32> @reassociate_umin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX-LABEL: reassociate_umin_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminud %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminud %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminud %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <4 x i32> %x0, %x1
@@ -737,8 +737,8 @@ define <2 x i64> @reassociate_umin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
; AVX512-LABEL: reassociate_umin_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpminuq %xmm3, %xmm2, %xmm1
-; AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpminuq %xmm2, %xmm3, %xmm1
+; AVX512-NEXT: vpminuq %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%t0 = add <2 x i64> %x0, %x1
@@ -768,8 +768,8 @@ define <16 x i8> @reassociate_smin_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>
; AVX-LABEL: reassociate_smin_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminsb %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminsb %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminsb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <16 x i8> %x0, %x1
@@ -791,8 +791,8 @@ define <8 x i16> @reassociate_smin_v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>
; AVX-LABEL: reassociate_smin_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminsw %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminsw %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminsw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <8 x i16> %x0, %x1
@@ -822,8 +822,8 @@ define <4 x i32> @reassociate_smin_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; AVX-LABEL: reassociate_smin_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpminsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpminsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vpminsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = add <4 x i32> %x0, %x1
@@ -882,8 +882,8 @@ define <2 x i64> @reassociate_smin_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>
; AVX512-LABEL: reassociate_smin_v2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpminsq %xmm3, %xmm2, %xmm1
-; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpminsq %xmm2, %xmm3, %xmm1
+; AVX512-NEXT: vpminsq %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%t0 = add <2 x i64> %x0, %x1
@@ -910,8 +910,8 @@ define <32 x i8> @reassociate_umax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
; AVX-LABEL: reassociate_umax_v32i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxub %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxub %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxub %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <32 x i8> %x0, %x1
@@ -940,8 +940,8 @@ define <16 x i16> @reassociate_umax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
; AVX-LABEL: reassociate_umax_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxuw %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxuw %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <16 x i16> %x0, %x1
@@ -995,8 +995,8 @@ define <8 x i32> @reassociate_umax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX-LABEL: reassociate_umax_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxud %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxud %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxud %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <8 x i32> %x0, %x1
@@ -1091,8 +1091,8 @@ define <4 x i64> @reassociate_umax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
; AVX512-LABEL: reassociate_umax_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmaxuq %ymm3, %ymm2, %ymm1
-; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxuq %ymm2, %ymm3, %ymm1
+; AVX512-NEXT: vpmaxuq %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%t0 = add <4 x i64> %x0, %x1
@@ -1133,8 +1133,8 @@ define <32 x i8> @reassociate_smax_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
; AVX-LABEL: reassociate_smax_v32i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxsb %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxsb %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxsb %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <32 x i8> %x0, %x1
@@ -1159,8 +1159,8 @@ define <16 x i16> @reassociate_smax_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
; AVX-LABEL: reassociate_smax_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxsw %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxsw %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxsw %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <16 x i16> %x0, %x1
@@ -1201,8 +1201,8 @@ define <8 x i32> @reassociate_smax_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX-LABEL: reassociate_smax_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpmaxsd %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpmaxsd %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpmaxsd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <8 x i32> %x0, %x1
@@ -1292,8 +1292,8 @@ define <4 x i64> @reassociate_smax_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
; AVX512-LABEL: reassociate_smax_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpmaxsq %ymm3, %ymm2, %ymm1
-; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpmaxsq %ymm2, %ymm3, %ymm1
+; AVX512-NEXT: vpmaxsq %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%t0 = add <4 x i64> %x0, %x1
@@ -1318,8 +1318,8 @@ define <32 x i8> @reassociate_umin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
; AVX-LABEL: reassociate_umin_v32i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminub %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminub %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminub %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <32 x i8> %x0, %x1
@@ -1354,8 +1354,8 @@ define <16 x i16> @reassociate_umin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
; AVX-LABEL: reassociate_umin_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminuw %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminuw %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminuw %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <16 x i16> %x0, %x1
@@ -1408,8 +1408,8 @@ define <8 x i32> @reassociate_umin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX-LABEL: reassociate_umin_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminud %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminud %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminud %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <8 x i32> %x0, %x1
@@ -1504,8 +1504,8 @@ define <4 x i64> @reassociate_umin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
; AVX512-LABEL: reassociate_umin_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpminuq %ymm3, %ymm2, %ymm1
-; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminuq %ymm2, %ymm3, %ymm1
+; AVX512-NEXT: vpminuq %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%t0 = add <4 x i64> %x0, %x1
@@ -1546,8 +1546,8 @@ define <32 x i8> @reassociate_smin_v32i8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>
; AVX-LABEL: reassociate_smin_v32i8:
; AVX: # %bb.0:
; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminsb %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminsb %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminsb %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <32 x i8> %x0, %x1
@@ -1572,8 +1572,8 @@ define <16 x i16> @reassociate_smin_v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x
; AVX-LABEL: reassociate_smin_v16i16:
; AVX: # %bb.0:
; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminsw %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminsw %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminsw %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <16 x i16> %x0, %x1
@@ -1614,8 +1614,8 @@ define <8 x i32> @reassociate_smin_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>
; AVX-LABEL: reassociate_smin_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vpminsd %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpminsd %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vpminsd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = add <8 x i32> %x0, %x1
@@ -1705,8 +1705,8 @@ define <4 x i64> @reassociate_smin_v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>
; AVX512-LABEL: reassociate_smin_v4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: vpminsq %ymm3, %ymm2, %ymm1
-; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpminsq %ymm2, %ymm3, %ymm1
+; AVX512-NEXT: vpminsq %ymm0, %ymm1, %ymm0
; AVX512-NEXT: retq
%t0 = add <4 x i64> %x0, %x1
@@ -1740,17 +1740,17 @@ define <64 x i8> @reassociate_umax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxub %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxub %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxub %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxub %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxub %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxub %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxub %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umax_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxub %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxub %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxub %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <64 x i8> %x0, %x1
@@ -1798,17 +1798,17 @@ define <32 x i16> @reassociate_umax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxuw %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxuw %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxuw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxuw %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxuw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxuw %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umax_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxuw %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxuw %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxuw %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <32 x i16> %x0, %x1
@@ -1907,17 +1907,17 @@ define <16 x i32> @reassociate_umax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxud %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxud %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxud %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxud %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxud %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxud %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxud %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umax_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxud %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxud %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxud %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <16 x i32> %x0, %x1
@@ -2091,8 +2091,8 @@ define <8 x i64> @reassociate_umax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
; AVX512-LABEL: reassociate_umax_v8i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxuq %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxuq %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxuq %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <8 x i64> %x0, %x1
@@ -2164,17 +2164,17 @@ define <64 x i8> @reassociate_smax_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxsb %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxsb %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxsb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxsb %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxsb %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxsb %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smax_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxsb %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsb %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxsb %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <64 x i8> %x0, %x1
@@ -2206,17 +2206,17 @@ define <32 x i16> @reassociate_smax_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxsw %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxsw %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxsw %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxsw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxsw %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smax_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxsw %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsw %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxsw %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <32 x i16> %x0, %x1
@@ -2288,17 +2288,17 @@ define <16 x i32> @reassociate_smax_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpmaxsd %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpmaxsd %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmaxsd %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpmaxsd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmaxsd %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smax_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxsd %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsd %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxsd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <16 x i32> %x0, %x1
@@ -2463,8 +2463,8 @@ define <8 x i64> @reassociate_smax_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
; AVX512-LABEL: reassociate_smax_v8i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpmaxsq %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmaxsq %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpmaxsq %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <8 x i64> %x0, %x1
@@ -2496,17 +2496,17 @@ define <64 x i8> @reassociate_umin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminub %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminub %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminub %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminub %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminub %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminub %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminub %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umin_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminub %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminub %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminub %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <64 x i8> %x0, %x1
@@ -2566,17 +2566,17 @@ define <32 x i16> @reassociate_umin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminuw %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminuw %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminuw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminuw %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminuw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminuw %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminuw %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umin_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminuw %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminuw %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminuw %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <32 x i16> %x0, %x1
@@ -2672,17 +2672,17 @@ define <16 x i32> @reassociate_umin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminud %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminud %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminud %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminud %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminud %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_umin_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminud %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminud %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminud %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <16 x i32> %x0, %x1
@@ -2856,8 +2856,8 @@ define <8 x i64> @reassociate_umin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
; AVX512-LABEL: reassociate_umin_v8i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminuq %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminuq %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminuq %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <8 x i64> %x0, %x1
@@ -2929,17 +2929,17 @@ define <64 x i8> @reassociate_smin_v64i8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8>
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminsb %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminsb %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminsb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminsb %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminsb %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminsb %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminsb %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smin_v64i8:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminsb %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsb %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminsb %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <64 x i8> %x0, %x1
@@ -2971,17 +2971,17 @@ define <32 x i16> @reassociate_smin_v32i16(<32 x i16> %x0, <32 x i16> %x1, <32 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminsw %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminsw %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminsw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminsw %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminsw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminsw %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminsw %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smin_v32i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminsw %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsw %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminsw %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <32 x i16> %x0, %x1
@@ -3053,17 +3053,17 @@ define <16 x i32> @reassociate_smin_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x
; AVX2: # %bb.0:
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpminsd %ymm6, %ymm4, %ymm2
-; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpminsd %ymm7, %ymm5, %ymm2
-; AVX2-NEXT: vpminsd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpminsd %ymm4, %ymm6, %ymm2
+; AVX2-NEXT: vpminsd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpminsd %ymm5, %ymm7, %ymm2
+; AVX2-NEXT: vpminsd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: reassociate_smin_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminsd %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsd %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminsd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <16 x i32> %x0, %x1
@@ -3228,8 +3228,8 @@ define <8 x i64> @reassociate_smin_v8i64(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>
; AVX512-LABEL: reassociate_smin_v8i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpminsq %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpminsq %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vpminsq %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = add <8 x i64> %x0, %x1
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index 07368e3d5645b..d7e2ebd2cccfd 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -44,7 +44,7 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz float %x0, %x1
%t1 = fadd reassoc nsz float %x2, %t0
@@ -63,8 +63,8 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
; AVX-LABEL: reassociate_adds3:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz float %x0, %x1
%t1 = fadd reassoc nsz float %t0, %x2
@@ -83,8 +83,8 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; AVX-LABEL: reassociate_adds4:
; AVX: # %bb.0:
; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz float %x0, %x1
%t1 = fadd reassoc nsz float %x2, %t0
@@ -143,8 +143,8 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
; AVX-LABEL: reassociate_adds6:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv reassoc nsz float %x0, %x1
%t1 = fadd reassoc nsz float %x2, %t0
@@ -165,8 +165,8 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
; AVX-LABEL: reassociate_muls1:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv reassoc nsz float %x0, %x1
%t1 = fmul reassoc nsz float %x2, %t0
@@ -187,8 +187,8 @@ define double @reassociate_adds_double(double %x0, double %x1, double %x2, doubl
; AVX-LABEL: reassociate_adds_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv reassoc nsz double %x0, %x1
%t1 = fadd reassoc nsz double %x2, %t0
@@ -209,8 +209,8 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
; AVX-LABEL: reassociate_muls_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv reassoc nsz double %x0, %x1
%t1 = fmul reassoc nsz double %x2, %t0
@@ -231,8 +231,8 @@ define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX1-LABEL: reassociate_adds_v4f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vaddps %xmm3, %xmm2, %xmm1
-; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vaddps %xmm2, %xmm3, %xmm1
+; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v4f32:
@@ -259,8 +259,8 @@ define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX1-LABEL: reassociate_adds_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vaddpd %xmm3, %xmm2, %xmm1
-; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vaddpd %xmm2, %xmm3, %xmm1
+; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v2f64:
@@ -287,8 +287,8 @@ define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX-LABEL: reassociate_muls_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulps %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz <4 x float> %x0, %x1
%t1 = fmul reassoc nsz <4 x float> %x2, %t0
@@ -309,8 +309,8 @@ define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX-LABEL: reassociate_muls_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulpd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmulpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz <2 x double> %x0, %x1
%t1 = fmul reassoc nsz <2 x double> %x2, %t0
@@ -334,8 +334,8 @@ define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX1-LABEL: reassociate_adds_v8f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vaddps %ymm3, %ymm2, %ymm1
-; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vaddps %ymm2, %ymm3, %ymm1
+; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v8f32:
@@ -365,8 +365,8 @@ define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX1-LABEL: reassociate_adds_v4f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmulpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vaddpd %ymm3, %ymm2, %ymm1
-; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm2, %ymm3, %ymm1
+; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v4f64:
@@ -396,8 +396,8 @@ define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX-LABEL: reassociate_muls_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulps %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz <8 x float> %x0, %x1
%t1 = fmul reassoc nsz <8 x float> %x2, %t0
@@ -421,8 +421,8 @@ define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX-LABEL: reassociate_muls_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulpd %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vmulpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd reassoc nsz <4 x double> %x0, %x1
%t1 = fmul reassoc nsz <4 x double> %x2, %t0
@@ -453,10 +453,10 @@ define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vmulps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vaddps %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vaddps %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddps %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vaddps %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vaddps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v16f32:
@@ -493,10 +493,10 @@ define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vmulpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vaddpd %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vaddpd %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddpd %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vaddpd %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vaddpd %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_adds_v8f64:
@@ -533,17 +533,17 @@ define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vmulps %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vmulps %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vmulps %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmulps %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vmulps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_muls_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmulps %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmulps %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vmulps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd reassoc nsz <16 x float> %x0, %x1
%t1 = fmul reassoc nsz <16 x float> %x2, %t0
@@ -574,17 +574,17 @@ define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vmulpd %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vmulpd %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vmulpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vmulpd %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vmulpd %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmulpd %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vmulpd %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_muls_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmulpd %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmulpd %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd reassoc nsz <8 x double> %x0, %x1
%t1 = fmul reassoc nsz <8 x double> %x2, %t0
@@ -605,8 +605,8 @@ define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3
; AVX-LABEL: reassociate_mins_single:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp olt float %x2, %t0
@@ -629,8 +629,8 @@ define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3
; AVX-LABEL: reassociate_maxs_single:
; AVX: # %bb.0:
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp ogt float %x2, %t0
@@ -653,8 +653,8 @@ define double @reassociate_mins_double(double %x0, double %x1, double %x2, doubl
; AVX-LABEL: reassociate_mins_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vminsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp olt double %x2, %t0
@@ -677,8 +677,8 @@ define double @reassociate_maxs_double(double %x0, double %x1, double %x2, doubl
; AVX-LABEL: reassociate_maxs_double:
; AVX: # %bb.0:
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxsd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp ogt double %x2, %t0
@@ -701,8 +701,8 @@ define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX-LABEL: reassociate_mins_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vminps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp olt <4 x float> %x2, %t0
@@ -725,8 +725,8 @@ define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX-LABEL: reassociate_maxs_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp ogt <4 x float> %x2, %t0
@@ -749,8 +749,8 @@ define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX-LABEL: reassociate_mins_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp olt <2 x double> %x2, %t0
@@ -773,8 +773,8 @@ define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX-LABEL: reassociate_maxs_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %xmm2, %xmm3, %xmm1
+; AVX-NEXT: vmaxpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp ogt <2 x double> %x2, %t0
@@ -800,8 +800,8 @@ define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX-LABEL: reassociate_mins_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vminps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp olt <8 x float> %x2, %t0
@@ -827,8 +827,8 @@ define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX-LABEL: reassociate_maxs_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vmaxps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp ogt <8 x float> %x2, %t0
@@ -854,8 +854,8 @@ define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX-LABEL: reassociate_mins_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vminpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp olt <4 x double> %x2, %t0
@@ -881,8 +881,8 @@ define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX-LABEL: reassociate_maxs_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %ymm2, %ymm3, %ymm1
+; AVX-NEXT: vmaxpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp ogt <4 x double> %x2, %t0
@@ -915,17 +915,17 @@ define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vminps %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vminps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vminps %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vminps %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vminps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vminps %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vminps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_mins_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vminps %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminps %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vminps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd <16 x float> %x0, %x1
%cmp1 = fcmp olt <16 x float> %x2, %t0
@@ -958,17 +958,17 @@ define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vmaxps %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vmaxps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vmaxps %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vmaxps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vmaxps %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vmaxps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmaxps %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vmaxps %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_maxs_v16f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmaxps %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxps %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vmaxps %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd <16 x float> %x0, %x1
%cmp1 = fcmp ogt <16 x float> %x2, %t0
@@ -1001,17 +1001,17 @@ define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vminpd %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vminpd %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vminpd %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vminpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vminpd %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vminpd %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vminpd %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vminpd %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_mins_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vminpd %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vminpd %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vminpd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd <8 x double> %x0, %x1
%cmp1 = fcmp olt <8 x double> %x2, %t0
@@ -1044,17 +1044,17 @@ define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX1: # %bb.0:
; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vmaxpd %ymm6, %ymm4, %ymm2
-; AVX1-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vmaxpd %ymm7, %ymm5, %ymm2
-; AVX1-NEXT: vmaxpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vmaxpd %ymm4, %ymm6, %ymm2
+; AVX1-NEXT: vmaxpd %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmaxpd %ymm5, %ymm7, %ymm2
+; AVX1-NEXT: vmaxpd %ymm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX512-LABEL: reassociate_maxs_v8f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vmaxpd %zmm3, %zmm2, %zmm1
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmaxpd %zmm2, %zmm3, %zmm1
+; AVX512-NEXT: vmaxpd %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%t0 = fadd <8 x double> %x0, %x1
%cmp1 = fcmp ogt <8 x double> %x2, %t0
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 4785531945b05..fd568a4969cd5 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -2690,8 +2690,8 @@ define i32 @madd_quad_reduction(<8 x i16>* %arg, <8 x i16>* %arg1, <8 x i16>* %a
; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
; AVX-NEXT: vmovdqu (%r10), %xmm2
; AVX-NEXT: vpmaddwd (%rax), %xmm2, %xmm2
-; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index f5e520e9b48b3..7da53c86b42c3 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -502,11 +502,11 @@ define <8 x i32> @test9(ptr %base, <8 x i64> %ind1, <8 x i32>%ind5) {
; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
-; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
-; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm1
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
@@ -586,11 +586,11 @@ define <8 x i32> @test10(ptr %base, <8 x i64> %i1, <8 x i32>%ind5) {
; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
-; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0
; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
; KNL_64-NEXT: vpmuldq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm1
-; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm1
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0
; KNL_64-NEXT: vpgatherqd 72(,%zmm1), %ymm0 {%k1}
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index c65858586f0db..031561514f190 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -1000,8 +1000,8 @@ define i64 @test_mul_by_29(i64 %x) {
; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %edx
; X86-NEXT: addl %esi, %edx
+; X86-NEXT: addl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/pr46877.ll b/llvm/test/CodeGen/X86/pr46877.ll
index cfd39672ef910..798663c1d4dca 100644
--- a/llvm/test/CodeGen/X86/pr46877.ll
+++ b/llvm/test/CodeGen/X86/pr46877.ll
@@ -13,7 +13,7 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm10
; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm10 = (xmm3 * xmm10) - xmm0
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm2
+; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm2
; CHECK-NEXT: vmulss %xmm2, %xmm10, %xmm4
; CHECK-NEXT: vmulss %xmm6, %xmm12, %xmm2
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm7 * xmm2) + xmm0
@@ -24,33 +24,33 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm3
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm3 = -(xmm3 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm3
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm4
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm14 * xmm4) + xmm0
-; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm4
+; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm5 = -(xmm5 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm5, %xmm2
; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm7, %xmm5
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm9 * xmm5) + xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss %xmm0, %xmm5, %xmm8
; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: vmovaps %xmm5, %xmm10
; CHECK-NEXT: vmulss %xmm14, %xmm8, %xmm5
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm12 * xmm5) + xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm5, %xmm2
; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm5 = -(xmm13 * xmm5) + xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm4, %xmm4
+; CHECK-NEXT: vmulss %xmm4, %xmm5, %xmm4
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm11
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm11 * xmm3) + xmm0
-; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
-; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm15 * xmm3) + xmm0
@@ -62,8 +62,8 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm8, %xmm6
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm6, %xmm4, %xmm4
-; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm4, %xmm6, %xmm4
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
@@ -75,8 +75,8 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm4 = -(xmm1 * xmm4) + xmm0
; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm6, %xmm4, %xmm4
-; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm4, %xmm6, %xmm4
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm1
@@ -88,7 +88,7 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm10 = -(xmm10 * mem) + xmm0
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm13, %xmm12
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm12 = -(xmm7 * xmm12) + xmm0
-; CHECK-NEXT: vmulss %xmm12, %xmm10, %xmm10
+; CHECK-NEXT: vmulss %xmm10, %xmm12, %xmm10
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm4
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm4
; CHECK-NEXT: vmulss %xmm4, %xmm10, %xmm12
@@ -100,17 +100,17 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm9
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm1
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm1 = -(xmm1 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm6, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm6, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm6
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm6 = -(xmm6 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
-; CHECK-NEXT: vmulss %xmm6, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm6, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm3, %xmm2
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm13 * xmm2) + xmm0
-; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm12, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
-; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm4
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm4
; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm6, %xmm3
@@ -120,24 +120,24 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
-; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm3 * xmm1) + xmm0
-; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm2, %xmm2
-; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 4-byte Reload
; CHECK-NEXT: # xmm10 = mem[0],zero,zero,zero
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm10, %xmm2
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm7 = -(xmm7 * mem) + xmm0
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm12 * xmm2) + xmm0
-; CHECK-NEXT: vmulss %xmm7, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm7, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm8 = -(xmm8 * mem) + xmm0
; CHECK-NEXT: vmulss %xmm2, %xmm8, %xmm2
-; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
@@ -147,13 +147,13 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * mem) + xmm0
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm6 * xmm3) + xmm0
-; CHECK-NEXT: vmulss %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vmulss %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm3
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm9, %xmm4
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
-; CHECK-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm2, %xmm1
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm11 = -(xmm6 * xmm11) + xmm0
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm5, %xmm2
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm2 = -(xmm15 * xmm2) + xmm0
@@ -161,7 +161,7 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm4, %xmm4
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm4 = -(xmm4 * mem) + xmm0
; CHECK-NEXT: vmulss %xmm2, %xmm11, %xmm2
-; CHECK-NEXT: vmulss %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vmulss %xmm2, %xmm4, %xmm2
; CHECK-NEXT: vfnmadd132ss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm14 # 4-byte Folded Reload
; CHECK-NEXT: # xmm14 = -(xmm14 * mem) + xmm0
; CHECK-NEXT: vmulss %xmm2, %xmm14, %xmm4
@@ -188,18 +188,18 @@ define void @tester(float %0, float %1, float %2, float %3, float %4, float %5,
; CHECK-NEXT: vfnmadd213ss {{.*#+}} xmm10 = -(xmm12 * xmm10) + xmm0
; CHECK-NEXT: vfmsub213ss {{.*#+}} xmm8 = (xmm15 * xmm8) - xmm0
; CHECK-NEXT: vfnmadd132ss {{.*#+}} xmm3 = -(xmm3 * mem) + xmm0
-; CHECK-NEXT: vmulss %xmm3, %xmm4, %xmm0
-; CHECK-NEXT: vmulss %xmm5, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vmulss %xmm0, %xmm5, %xmm0
; CHECK-NEXT: vmulss %xmm0, %xmm9, %xmm0
-; CHECK-NEXT: vmulss %xmm7, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm0, %xmm7, %xmm0
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm1, %xmm1
-; CHECK-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: vmulss %xmm6, %xmm2, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm2, %xmm6, %xmm1
; CHECK-NEXT: vmulss %xmm1, %xmm11, %xmm1
; CHECK-NEXT: vmulss %xmm1, %xmm10, %xmm1
; CHECK-NEXT: vmulss {{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-NEXT: vmulss %xmm1, %xmm8, %xmm1
-; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmulss %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vmovss %xmm0, (%rdi)
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index e0c903d96f5c8..333aebc503f58 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -598,8 +598,8 @@ define float @div_sqrt_fabs_f32(float %x, float %y, float %z) {
; AVX1-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vmulss %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmulss %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: div_sqrt_fabs_f32:
@@ -610,8 +610,8 @@ define float @div_sqrt_fabs_f32(float %x, float %y, float %z) {
; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX512-NEXT: vmulss %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%s = call fast float @llvm.sqrt.f32(float %z)
%a = call fast float @llvm.fabs.f32(float %y)
@@ -778,8 +778,8 @@ define float @div_sqrt_f32(float %x, float %y) {
; AVX1-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vmulss %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmulss %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: div_sqrt_f32:
@@ -790,8 +790,8 @@ define float @div_sqrt_f32(float %x, float %y) {
; AVX512-NEXT: vmulss %xmm2, %xmm1, %xmm1
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX512-NEXT: vmulss %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%s = call fast float @llvm.sqrt.f32(float %y)
%m = fmul fast float %s, %y
diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
index d0deed539bc52..644ecee4f1ca8 100644
--- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll
@@ -143,11 +143,11 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; X86-NEXT: setne %al
; X86-NEXT: andb %bl, %al
; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
-; X86-NEXT: orb %bh, %al
-; X86-NEXT: andb $1, %al
-; X86-NEXT: movb %al, 16(%ecx)
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %bh # 1-byte Folded Reload
+; X86-NEXT: orb %al, %bh
+; X86-NEXT: andb $1, %bh
+; X86-NEXT: movb %bh, 16(%ecx)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $24, %esp
; X86-NEXT: .cfi_def_cfa_offset 20
diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
index b7a2daf0615e5..a6b919a50140a 100644
--- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -841,10 +841,10 @@ define <32 x i8> @interleaved_load_vf32_i8_stride3(ptr %ptr){
; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[11,12,13,14,15],xmm7[0,1,2,3,4,5,6,7,8,9,10]
; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddb %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -866,10 +866,10 @@ define <32 x i8> @interleaved_load_vf32_i8_stride3(ptr %ptr){
; AVX2OR512-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10],ymm3[27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26]
; AVX2OR512-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0]
; AVX2OR512-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1
-; AVX2OR512-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; AVX2OR512-NEXT: vpaddb %ymm1, %ymm2, %ymm1
; AVX2OR512-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[11,12,13,14,15],ymm3[0,1,2,3,4,5,6,7,8,9,10],ymm0[27,28,29,30,31],ymm3[16,17,18,19,20,21,22,23,24,25,26]
; AVX2OR512-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
-; AVX2OR512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX2OR512-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX2OR512-NEXT: retq
%wide.vec = load <96 x i8>, ptr %ptr
%v1 = shufflevector <96 x i8> %wide.vec, <96 x i8> undef,<32 x i32> <i32 0,i32 3,i32 6,i32 9,i32 12,i32 15,i32 18,i32 21,i32 24,i32 27,i32 30,i32 33,i32 36,i32 39,i32 42,i32 45,i32 48,i32 51,i32 54,i32 57,i32 60,i32 63,i32 66,i32 69,i32 72,i32 75,i32 78,i32 81,i32 84,i32 87,i32 90,i32 93>
@@ -896,10 +896,10 @@ define <16 x i8> @interleaved_load_vf16_i8_stride3(ptr %ptr){
; AVX-NEXT: vpalignr {{.*#+}} xmm2 = xmm3[11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10]
; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0]
; AVX-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm1
-; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10]
; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
-; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%wide.vec = load <48 x i8>, ptr %ptr
%v1 = shufflevector <48 x i8> %wide.vec, <48 x i8> undef,<16 x i32> <i32 0,i32 3,i32 6,i32 9,i32 12,i32 15,i32 18,i32 21,i32 24,i32 27,i32 30,i32 33,i32 36,i32 39,i32 42 ,i32 45>
@@ -924,8 +924,8 @@ define <8 x i8> @interleaved_load_vf8_i8_stride3(ptr %ptr){
; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,xmm1[1,4,7,u,u,u,u,u,u,u,u]
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,5,8,11,14],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpaddb %xmm0, %xmm3, %xmm0
-; AVX-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%wide.vec = load <24 x i8>, ptr %ptr
%v1 = shufflevector <24 x i8> %wide.vec, <24 x i8> undef,<8 x i32> <i32 0,i32 3,i32 6,i32 9,i32 12,i32 15,i32 18,i32 21>
@@ -1366,44 +1366,44 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){
; AVX1-NEXT: vmovdqa {{.*#+}} xmm11 = [128,128,128,128,128,128,128,128,128,128,128,2,5,8,11,14]
; AVX1-NEXT: vpshufb %xmm11, %xmm4, %xmm4
; AVX1-NEXT: vpor %xmm4, %xmm10, %xmm4
-; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddb %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm9, %xmm3, %xmm5
; AVX1-NEXT: vpshufb %xmm11, %xmm8, %xmm8
; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm5
-; AVX1-NEXT: vpaddb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpaddb %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm9, %xmm3, %xmm5
; AVX1-NEXT: vpshufb %xmm11, %xmm12, %xmm8
; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm5
-; AVX1-NEXT: vpaddb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpaddb %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm9, %xmm3, %xmm5
; AVX1-NEXT: vpshufb %xmm11, %xmm15, %xmm8
; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm5
-; AVX1-NEXT: vpaddb %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpaddb %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4]
; AVX1-NEXT: vpshufb %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128]
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm8, %xmm3, %xmm3
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpaddb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpshufb %xmm6, %xmm7, %xmm3
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm8, %xmm4, %xmm4
; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm6, %xmm14, %xmm3
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm8, %xmm4, %xmm4
; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm6, %xmm13, %xmm3
; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; AVX1-NEXT: vpshufb %xmm8, %xmm4, %xmm4
; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: retq
@@ -1456,11 +1456,11 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm7 = [255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255]
; AVX2-NEXT: # ymm7 = mem[0,1,0,1]
; AVX2-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpalignr {{.*#+}} ymm2 = ymm5[5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,21,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20]
; AVX2-NEXT: vpshufb %ymm6, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpaddb %ymm1, %ymm3, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: interleaved_load_vf64_i8_stride3:
@@ -1491,10 +1491,10 @@ define <64 x i8> @interleaved_load_vf64_i8_stride3(ptr %ptr){
; AVX512-NEXT: kmovq %rax, %k1
; AVX512-NEXT: vpblendmb %zmm1, %zmm0, %zmm2 {%k1}
; AVX512-NEXT: vpalignr {{.*#+}} zmm1 = zmm3[11,12,13,14,15],zmm1[0,1,2,3,4,5,6,7,8,9,10],zmm3[27,28,29,30,31],zmm1[16,17,18,19,20,21,22,23,24,25,26],zmm3[43,44,45,46,47],zmm1[32,33,34,35,36,37,38,39,40,41,42],zmm3[59,60,61,62,63],zmm1[48,49,50,51,52,53,54,55,56,57,58]
-; AVX512-NEXT: vpaddb %zmm1, %zmm2, %zmm1
+; AVX512-NEXT: vpaddb %zmm2, %zmm1, %zmm1
; AVX512-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zmm3[0,1,2,3,4,5,6,7,8,9,10],zmm0[27,28,29,30,31],zmm3[16,17,18,19,20,21,22,23,24,25,26],zmm0[43,44,45,46,47],zmm3[32,33,34,35,36,37,38,39,40,41,42],zmm0[59,60,61,62,63],zmm3[48,49,50,51,52,53,54,55,56,57,58]
; AVX512-NEXT: vpalignr {{.*#+}} zmm0 = zmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25,42,43,44,45,46,47,32,33,34,35,36,37,38,39,40,41,58,59,60,61,62,63,48,49,50,51,52,53,54,55,56,57]
-; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpaddb %zmm0, %zmm1, %zmm0
; AVX512-NEXT: retq
%wide.vec = load <192 x i8>, ptr %ptr, align 1
%v1 = shufflevector <192 x i8> %wide.vec, <192 x i8> undef, <64 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45, i32 48, i32 51, i32 54, i32 57, i32 60, i32 63, i32 66, i32 69, i32 72, i32 75, i32 78, i32 81, i32 84, i32 87, i32 90, i32 93, i32 96, i32 99, i32 102, i32 105, i32 108, i32 111, i32 114, i32 117, i32 120, i32 123, i32 126, i32 129, i32 132, i32 135, i32 138, i32 141, i32 144, i32 147, i32 150, i32 153, i32 156, i32 159, i32 162, i32 165, i32 168, i32 171, i32 174, i32 177, i32 180, i32 183, i32 186, i32 189>
More information about the llvm-commits
mailing list