[llvm] bd7096b - [PowerPC] fma chain break to expose more ILP
Chen Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 14 21:10:53 PDT 2020
Author: Chen Zheng
Date: 2020-06-15T00:00:04-04:00
New Revision: bd7096b977e11293b5a7406c4846ac805197fca5
URL: https://github.com/llvm/llvm-project/commit/bd7096b977e11293b5a7406c4846ac805197fca5
DIFF: https://github.com/llvm/llvm-project/commit/bd7096b977e11293b5a7406c4846ac805197fca5.diff
LOG: [PowerPC] fma chain break to expose more ILP
This patch tries to reassociate two patterns related to FMA to expose
more ILP on PowerPC.
// Pattern 1:
// A = FADD X, Y (Leaf)
// B = FMA A, M21, M22 (Prev)
// C = FMA B, M31, M32 (Root)
// -->
// A = FMA X, M21, M22
// B = FMA Y, M31, M32
// C = FADD A, B
// Pattern 2:
// A = FMA X, M11, M12 (Leaf)
// B = FMA A, M21, M22 (Prev)
// C = FMA B, M31, M32 (Root)
// -->
// A = FMUL M11, M12
// B = FMA X, M21, M22
// D = FMA A, M31, M32
// C = FADD B, D
Reviewed By: jsji
Differential Revision: https://reviews.llvm.org/D80175
Added:
Modified:
llvm/include/llvm/CodeGen/MachineCombinerPattern.h
llvm/lib/CodeGen/MachineCombiner.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/test/CodeGen/PowerPC/machine-combiner.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 149fe043d1f5..e9f52fb064e1 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -25,6 +25,10 @@ enum class MachineCombinerPattern {
REASSOC_XA_BY,
REASSOC_XA_YB,
+ // These are patterns matched by the PowerPC to reassociate FMA chains.
+ REASSOC_XY_AMM_BMM,
+ REASSOC_XMM_AMM_BMM,
+
// These are multiply-add patterns matched by the AArch64 machine combiner.
MULADDW_OP1,
MULADDW_OP2,
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 34087d0491bd..f241435a0482 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -269,6 +269,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
case MachineCombinerPattern::REASSOC_AX_YB:
case MachineCombinerPattern::REASSOC_XA_BY:
case MachineCombinerPattern::REASSOC_XA_YB:
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
return CombinerObjective::MustReduceDepth;
default:
return CombinerObjective::Default;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e70f04e72ed5..20b50a954955 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -280,6 +280,144 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
}
}
+#define InfoArrayIdxFMAInst 0
+#define InfoArrayIdxFAddInst 1
+#define InfoArrayIdxFMULInst 2
+#define InfoArrayIdxAddOpIdx 3
+#define InfoArrayIdxMULOpIdx 4
+// Array keeps info for FMA instructions:
+// Index 0(InfoArrayIdxFMAInst): FMA instruction;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in the FMA operand list;
+// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in the FMA operand
+// list;
+// second MUL operand index is plus 1.
+static const uint16_t FMAOpIdxInfo[][5] = {
+ // FIXME: add more FMA instructions like XSNMADDADP and so on.
+ {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
+ {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
+ {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
+ {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
+ {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
+ {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+ {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
+ {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+
+// Check if an opcode is a FMA instruction. If it is, return the index in array
+// FMAOpIdxInfo. Otherwise, return -1.
+int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
+ for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
+ if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
+ return I;
+ return -1;
+}
+
+// Try to reassociate FMA chains like below:
+//
+// Pattern 1:
+// A = FADD X, Y (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMA X, M21, M22
+// B = FMA Y, M31, M32
+// C = FADD A, B
+//
+// Pattern 2:
+// A = FMA X, M11, M12 (Leaf)
+// B = FMA A, M21, M22 (Prev)
+// C = FMA B, M31, M32 (Root)
+// -->
+// A = FMUL M11, M12
+// B = FMA X, M21, M22
+// D = FMA A, M31, M32
+// C = FADD B, D
+//
+// breaking the dependency between A and B, allowing FMA to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+bool PPCInstrInfo::getFMAPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ MachineBasicBlock *MBB = Root.getParent();
+ const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
+ for (const auto &MO : Instr.explicit_operands())
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ return false;
+ return true;
+ };
+
+ auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+ bool IsLeaf, bool IsAdd) {
+ int16_t Idx = -1;
+ if (!IsAdd) {
+ Idx = getFMAOpIdxInfo(Instr.getOpcode());
+ if (Idx < 0)
+ return false;
+ } else if (Instr.getOpcode() !=
+ FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
+ [InfoArrayIdxFAddInst])
+ return false;
+
+ // Instruction can be reassociated.
+ // fast match flags may prohibit reassociation.
+ if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+ return false;
+
+ // Instruction operands are virtual registers for reassociating.
+ if (!IsAllOpsVirtualReg(Instr))
+ return false;
+
+ if (IsAdd && IsLeaf)
+ return true;
+
+ AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+
+ const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
+ MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+ // If 'add' operand's def is not in current block, don't do ILP related opt.
+ if (!MIAdd || MIAdd->getParent() != MBB)
+ return false;
+
+ // If this is not Leaf FMA Instr, its 'add' operand should only have one use
+ // as this fma will be changed later.
+ return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+ };
+
+ int16_t AddOpIdx = -1;
+ // Root must be a valid FMA like instruction.
+ if (!IsReassociable(Root, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegB = Root.getOperand(AddOpIdx).getReg();
+ MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+
+ // Prev must be a valid FMA like instruction.
+ AddOpIdx = -1;
+ if (!IsReassociable(*Prev, AddOpIdx, false, false))
+ return false;
+
+ assert((AddOpIdx >= 0) && "add operand index not right!");
+
+ Register RegA = Prev->getOperand(AddOpIdx).getReg();
+ MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+ AddOpIdx = -1;
+ if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+ return true;
+ }
+ if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+ Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+ return true;
+ }
+ return false;
+}
+
bool PPCInstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
@@ -288,9 +426,198 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
return false;
+ if (getFMAPatterns(Root, Patterns))
+ return true;
+
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
}
+void PPCInstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ switch (Pattern) {
+ case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+ case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+ reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
+ break;
+ default:
+ // Reassociate default patterns.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ break;
+ }
+}
+
+// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
+// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
+void PPCInstrInfo::reassociateFMA(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineOperand &OpC = Root.getOperand(0);
+ Register RegC = OpC.getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(RegC);
+ MRI.constrainRegClass(RegC, RC);
+
+ unsigned FmaOp = Root.getOpcode();
+ int16_t Idx = getFMAOpIdxInfo(FmaOp);
+ assert(Idx >= 0 && "Root must be a FMA instruction");
+
+ uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+ uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+ MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+ MachineInstr *Leaf =
+ MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+ uint16_t IntersectedFlags =
+ Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+ auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
+ bool &KillFlag) {
+ Reg = Operand.getReg();
+ MRI.constrainRegClass(Reg, RC);
+ KillFlag = Operand.isKill();
+ };
+
+ auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
+ Register &MulOp2, bool &MulOp1KillFlag,
+ bool &MulOp2KillFlag) {
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
+ GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+ };
+
+ Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+ bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
+ KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+
+ GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
+ GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+ GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+ GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+ }
+
+ // Create new virtual registers for the new results instead of
+ // recycling legacy ones because the MachineCombiner's computation of the
+ // critical path requires a new register definition rather than an existing
+ // one.
+ Register NewVRA = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
+
+ Register NewVRB = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+
+ Register NewVRD = 0;
+ if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ NewVRD = MRI.createVirtualRegister(RC);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
+ }
+
+ auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
+ Register RegMul1, bool KillRegMul1,
+ Register RegMul2, bool KillRegMul2) {
+ MI->getOperand(AddOpIdx).setReg(RegAdd);
+ MI->getOperand(AddOpIdx).setIsKill(KillAdd);
+ MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
+ MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
+ MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
+ MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
+ };
+
+ if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+ .addReg(RegY, getKillRegState(KillY))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // if AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRA, getKillRegState(true));
+
+ // update flags for new created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewC);
+ } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+ assert(NewVRD && "new FMA register not created!");
+ // Create new instructions for insertion.
+ MachineInstrBuilder MINewA =
+ BuildMI(*MF, Leaf->getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)
+ .addReg(RegM11, getKillRegState(KillM11))
+ .addReg(RegM12, getKillRegState(KillM12));
+ MachineInstrBuilder MINewB =
+ BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+ .addReg(RegX, getKillRegState(KillX))
+ .addReg(RegM21, getKillRegState(KillM21))
+ .addReg(RegM22, getKillRegState(KillM22));
+ MachineInstrBuilder MINewD =
+ BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
+ .addReg(NewVRA, getKillRegState(true))
+ .addReg(RegM31, getKillRegState(KillM31))
+ .addReg(RegM32, getKillRegState(KillM32));
+ // If AddOpIdx is not 1, adjust the order.
+ if (AddOpIdx != 1) {
+ AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+ AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
+ KillM32);
+ }
+
+ MachineInstrBuilder MINewC =
+ BuildMI(*MF, Root.getDebugLoc(),
+ get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+ .addReg(NewVRB, getKillRegState(true))
+ .addReg(NewVRD, getKillRegState(true));
+
+ // update flags for new created instructions.
+ setSpecialOperandAttr(*MINewA, IntersectedFlags);
+ setSpecialOperandAttr(*MINewB, IntersectedFlags);
+ setSpecialOperandAttr(*MINewD, IntersectedFlags);
+ setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+ // Record new instructions for insertion.
+ InsInstrs.push_back(MINewA);
+ InsInstrs.push_back(MINewB);
+ InsInstrs.push_back(MINewD);
+ InsInstrs.push_back(MINewC);
+ }
+
+ assert(!InsInstrs.empty() &&
+ "Insertion instructions set should not be empty!");
+
+ // Record old instructions for deletion.
+ DelInstrs.push_back(Leaf);
+ DelInstrs.push_back(Prev);
+ DelInstrs.push_back(&Root);
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 3d6f7f1af346..ca30ced9d404 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -229,6 +229,11 @@ class PPCInstrInfo : public PPCGenInstrInfo {
unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
+ int16_t getFMAOpIdxInfo(unsigned Opcode) const;
+ void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
virtual void anchor();
protected:
@@ -308,6 +313,20 @@ class PPCInstrInfo : public PPCGenInstrInfo {
return true;
}
+ /// When getMachineCombinerPatterns() finds patterns, this function generates
+ /// the instructions that could replace the original code sequence
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ /// Return true when there is potentially a faster code sequence for a fma
+ /// chain ending in \p Root. All potential patterns are output in the \p
+ /// P array.
+ bool getFMAPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const;
+
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in <Root>. All potential patterns are
/// output in the <Pattern> array.
@@ -317,6 +336,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+ /// On PowerPC, we try to reassociate FMA chain which will increase
+ /// instruction size. Set extension resource length limit to 1 for edge case.
+ /// Resource Length is calculated by scaled resource usage in getCycles().
+ /// Because of the division in getCycles(), it returns
diff erent cycles due to
+ /// legacy scaled resource usage. So new resource length may be same with
+ /// legacy or 1 bigger than legacy.
+ /// We need to execlude the 1 bigger case even the resource length is not
+ /// perserved for more FMA chain reassociations on PowerPC.
+ int getExtendResourceLenLimit() const override { return 1; }
+
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const override;
diff --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
index 7206d5e4dfaa..118d1da969b6 100644
--- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -217,12 +217,12 @@ define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
; CHECK-LABEL: reassociate_mamaa_double:
; CHECK: # %bb.0:
-; CHECK-QPX: fadd 0, 2, 1
-; CHECK-QPX: fmadd 0, 4, 3, 0
-; CHECK-QPX: fmadd 1, 6, 5, 0
-; CHECK-PWR: xsadddp 1, 2, 1
-; CHECK-PWR: xsmaddadp 1, 4, 3
-; CHECK-PWR: xsmaddadp 1, 6, 5
+; CHECK-QPX-DAG: fmadd [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG: fmadd [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX: fadd 1, [[REG0]], [[REG1]]
+; CHECK-PWR-DAG: xsmaddadp 1, 6, 5
+; CHECK-PWR-DAG: xsmaddadp 2, 4, 3
+; CHECK-PWR: xsadddp 1, 2, 1
; CHECK-NEXT: blr
%7 = fmul reassoc nsz double %3, %2
%8 = fmul reassoc nsz double %5, %4
@@ -235,10 +235,10 @@ define double @reassociate_mamaa_double(double %0, double %1, double %2, double
; FIXME: should use xsmaddasp instead of fmadds for pwr7 arch.
define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, float %4, float %5) {
; CHECK-LABEL: reassociate_mamaa_float:
-; CHECK: # %bb.0:
-; CHECK: fadds 0, 2, 1
-; CHECK: fmadds 0, 4, 3, 0
-; CHECK: fmadds 1, 6, 5, 0
+; CHECK: # %bb.0:
+; CHECK-DAG: fmadds [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-DAG: fmadds [[REG1:[0-9]+]], 6, 5, 1
+; CHECK: fadds 1, [[REG0]], [[REG1]]
; CHECK-NEXT: blr
%7 = fmul reassoc nsz float %3, %2
%8 = fmul reassoc nsz float %5, %4
@@ -251,12 +251,12 @@ define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, fl
define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
; CHECK-LABEL: reassociate_mamaa_vec:
; CHECK: # %bb.0:
-; CHECK-QPX: qvfadds 0, 2, 1
-; CHECK-QPX: qvfmadds 0, 4, 3, 0
-; CHECK-QPX: qvfmadds 1, 6, 5, 0
-; CHECK-PWR: xvaddsp 34, 35, 34
-; CHECK-PWR: xvmaddasp 34, 37, 36
-; CHECK-PWR: xvmaddasp 34, 39, 38
+; CHECK-QPX-DAG: qvfmadds [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG: qvfmadds [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR-DAG: xvmaddasp [[REG0:[0-9]+]], 39, 38
+; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36
+; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]]
; CHECK-NEXT: blr
%7 = fmul reassoc nsz <4 x float> %3, %2
%8 = fmul reassoc nsz <4 x float> %5, %4
@@ -269,15 +269,16 @@ define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x f
define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
; CHECK-LABEL: reassociate_mamama_double:
; CHECK: # %bb.0:
-; CHECK-QPX: fmadd 0, 2, 1, 7
-; CHECK-QPX-DAG: fmadd 0, 4, 3, 0
-; CHECK-QPX-DAG: fmadd 0, 6, 5, 0
-; CHECK-QPX: fmadd 1, 9, 8, 0
+; CHECK-QPX: fmadd [[REG0:[0-9]+]], 2, 1, 7
+; CHECK-QPX-DAG: fmul [[REG1:[0-9]+]], 4, 3
+; CHECK-QPX-DAG: fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
+; CHECK-QPX-DAG: fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
+; CHECK-QPX: fadd 1, [[REG2]], [[REG3]]
; CHECK-PWR: xsmaddadp 7, 2, 1
-; CHECK-PWR-DAG: xsmaddadp 7, 4, 3
+; CHECK-PWR-DAG: xsmuldp [[REG0:[0-9]+]], 4, 3
; CHECK-PWR-DAG: xsmaddadp 7, 6, 5
-; CHECK-PWR-DAG: xsmaddadp 7, 9, 8
-; CHECK-PWR: fmr 1, 7
+; CHECK-PWR-DAG: xsmaddadp [[REG0]], 9, 8
+; CHECK-PWR: xsadddp 1, 7, [[REG0]]
; CHECK-NEXT: blr
%10 = fmul reassoc nsz double %1, %0
%11 = fmul reassoc nsz double %3, %2
@@ -295,16 +296,18 @@ define dso_local float @reassociate_mamama_8(float %0, float %1, float %2, float
float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16) {
; CHECK-LABEL: reassociate_mamama_8:
; CHECK: # %bb.0:
-; CHECK: fmadds [[REG0:[0-9]+]], 3, 2, 1
-; CHECK-DAG: fmadds [[REG0]], 5, 4, [[REG0]]
-; CHECK-DAG: fmadds [[REG0]], 7, 6, [[REG0]]
-; CHECK-DAG: fmadds [[REG0]], 9, 8, [[REG0]]
-; CHECK-DAG: fmadds [[REG0]], 13, 12, [[REG0]]
-; CHECK-DAG: fmadds [[REG0]], 11, 10, [[REG0]]
+; CHECK-DAG: fmadds [[REG0:[0-9]+]], 3, 2, 1
+; CHECK-DAG: fmuls [[REG1:[0-9]+]], 5, 4
+; CHECK-DAG: fmadds [[REG2:[0-9]+]], 7, 6, [[REG0]]
+; CHECK-DAG: fmadds [[REG3:[0-9]+]], 9, 8, [[REG1]]
;
-; CHECK: fmadds [[REG0]],
-; CHECK: fmadds 1,
-; CHECK-NEXT: blr
+; CHECK-DAG: fmadds [[REG4:[0-9]+]], 13, 12, [[REG3]]
+; CHECK-DAG: fmadds [[REG5:[0-9]+]], 11, 10, [[REG2]]
+;
+; CHECK-DAG: fmadds [[REG6:[0-9]+]], 3, 2, [[REG4]]
+; CHECK-DAG: fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]]
+; CHECK: fadds 1, [[REG7]], [[REG6]]
+; CHECK-NEXT: blr
%18 = fmul reassoc nsz float %2, %1
%19 = fadd reassoc nsz float %18, %0
%20 = fmul reassoc nsz float %4, %3
More information about the llvm-commits
mailing list