[llvm] bd7096b - [PowerPC] fma chain break to expose more ILP

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 21:10:53 PDT 2020


Author: Chen Zheng
Date: 2020-06-15T00:00:04-04:00
New Revision: bd7096b977e11293b5a7406c4846ac805197fca5

URL: https://github.com/llvm/llvm-project/commit/bd7096b977e11293b5a7406c4846ac805197fca5
DIFF: https://github.com/llvm/llvm-project/commit/bd7096b977e11293b5a7406c4846ac805197fca5.diff

LOG: [PowerPC] fma chain break to expose more ILP

This patch tries to reassociate two patterns related to FMA to expose
more ILP on PowerPC.

// Pattern 1:
//   A =  FADD X,  Y          (Leaf)
//   B =  FMA  A,  M21,  M22  (Prev)
//   C =  FMA  B,  M31,  M32  (Root)
// -->
//   A =  FMA  X,  M21,  M22
//   B =  FMA  Y,  M31,  M32
//   C =  FADD A,  B

// Pattern 2:
//   A =  FMA  X,  M11,  M12  (Leaf)
//   B =  FMA  A,  M21,  M22  (Prev)
//   C =  FMA  B,  M31,  M32  (Root)
// -->
//   A =  FMUL M11,  M12
//   B =  FMA  X,  M21,  M22
//   D =  FMA  A,  M31,  M32
//   C =  FADD B,  D

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D80175

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/MachineCombinerPattern.h
    llvm/lib/CodeGen/MachineCombiner.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/test/CodeGen/PowerPC/machine-combiner.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 149fe043d1f5..e9f52fb064e1 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -25,6 +25,10 @@ enum class MachineCombinerPattern {
   REASSOC_XA_BY,
   REASSOC_XA_YB,
 
+  // These are patterns matched by the PowerPC to reassociate FMA chains.
+  REASSOC_XY_AMM_BMM,
+  REASSOC_XMM_AMM_BMM,
+
   // These are multiply-add patterns matched by the AArch64 machine combiner.
   MULADDW_OP1,
   MULADDW_OP2,

diff  --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 34087d0491bd..f241435a0482 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -269,6 +269,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
   case MachineCombinerPattern::REASSOC_AX_YB:
   case MachineCombinerPattern::REASSOC_XA_BY:
   case MachineCombinerPattern::REASSOC_XA_YB:
+  case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+  case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
     return CombinerObjective::MustReduceDepth;
   default:
     return CombinerObjective::Default;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e70f04e72ed5..20b50a954955 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -280,6 +280,144 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
   }
 }
 
+#define InfoArrayIdxFMAInst 0
+#define InfoArrayIdxFAddInst 1
+#define InfoArrayIdxFMULInst 2
+#define InfoArrayIdxAddOpIdx 3
+#define InfoArrayIdxMULOpIdx 4
+// Array keeps info for FMA instructions:
+// Index 0(InfoArrayIdxFMAInst): FMA instruction;
+// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA;
+// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA;
+// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in the FMA operand list;
+// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in the FMA operand
+//                                list;
+//                                second MUL operand index is plus 1.
+static const uint16_t FMAOpIdxInfo[][5] = {
+    // FIXME: add more FMA instructions like XSNMADDADP and so on.
+    {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2},
+    {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2},
+    {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2},
+    {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2},
+    {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1},
+    {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1},
+    {PPC::QVFMADDSs, PPC::QVFADDSs, PPC::QVFMULSs, 3, 1},
+    {PPC::QVFMADD, PPC::QVFADD, PPC::QVFMUL, 3, 1}};
+
+// Check if an opcode is a FMA instruction. If it is, return the index in array
+// FMAOpIdxInfo. Otherwise, return -1.
+int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
+  for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
+    if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
+      return I;
+  return -1;
+}
+
+// Try to reassociate FMA chains like below:
+//
+// Pattern 1:
+//   A =  FADD X,  Y          (Leaf)
+//   B =  FMA  A,  M21,  M22  (Prev)
+//   C =  FMA  B,  M31,  M32  (Root)
+// -->
+//   A =  FMA  X,  M21,  M22
+//   B =  FMA  Y,  M31,  M32
+//   C =  FADD A,  B
+//
+// Pattern 2:
+//   A =  FMA  X,  M11,  M12  (Leaf)
+//   B =  FMA  A,  M21,  M22  (Prev)
+//   C =  FMA  B,  M31,  M32  (Root)
+// -->
+//   A =  FMUL M11,  M12
+//   B =  FMA  X,  M21,  M22
+//   D =  FMA  A,  M31,  M32
+//   C =  FADD B,  D
+//
+// breaking the dependency between A and B, allowing FMA to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+bool PPCInstrInfo::getFMAPatterns(
+    MachineInstr &Root,
+    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+  MachineBasicBlock *MBB = Root.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
+    for (const auto &MO : Instr.explicit_operands())
+      if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+        return false;
+    return true;
+  };
+
+  auto IsReassociable = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
+                            bool IsLeaf, bool IsAdd) {
+    int16_t Idx = -1;
+    if (!IsAdd) {
+      Idx = getFMAOpIdxInfo(Instr.getOpcode());
+      if (Idx < 0)
+        return false;
+    } else if (Instr.getOpcode() !=
+               FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())]
+                           [InfoArrayIdxFAddInst])
+      return false;
+
+    // Instruction can be reassociated.
+    // fast match flags may prohibit reassociation.
+    if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+          Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
+      return false;
+
+    // Instruction operands are virtual registers for reassociating.
+    if (!IsAllOpsVirtualReg(Instr))
+      return false;
+
+    if (IsAdd && IsLeaf)
+      return true;
+
+    AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+
+    const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
+    MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg());
+    // If 'add' operand's def is not in current block, don't do ILP related opt.
+    if (!MIAdd || MIAdd->getParent() != MBB)
+      return false;
+
+    // If this is not Leaf FMA Instr, its 'add' operand should only have one use
+    // as this fma will be changed later.
+    return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg());
+  };
+
+  int16_t AddOpIdx = -1;
+  // Root must be a valid FMA like instruction.
+  if (!IsReassociable(Root, AddOpIdx, false, false))
+    return false;
+
+  assert((AddOpIdx >= 0) && "add operand index not right!");
+
+  Register RegB = Root.getOperand(AddOpIdx).getReg();
+  MachineInstr *Prev = MRI.getUniqueVRegDef(RegB);
+
+  // Prev must be a valid FMA like instruction.
+  AddOpIdx = -1;
+  if (!IsReassociable(*Prev, AddOpIdx, false, false))
+    return false;
+
+  assert((AddOpIdx >= 0) && "add operand index not right!");
+
+  Register RegA = Prev->getOperand(AddOpIdx).getReg();
+  MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA);
+  AddOpIdx = -1;
+  if (IsReassociable(*Leaf, AddOpIdx, true, false)) {
+    Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
+    return true;
+  }
+  if (IsReassociable(*Leaf, AddOpIdx, true, true)) {
+    Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
+    return true;
+  }
+  return false;
+}
+
 bool PPCInstrInfo::getMachineCombinerPatterns(
     MachineInstr &Root,
     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
@@ -288,9 +426,198 @@ bool PPCInstrInfo::getMachineCombinerPatterns(
   if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
     return false;
 
+  if (getFMAPatterns(Root, Patterns))
+    return true;
+
   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
 }
 
+void PPCInstrInfo::genAlternativeCodeSequence(
+    MachineInstr &Root, MachineCombinerPattern Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+  switch (Pattern) {
+  case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
+  case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
+    reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
+    break;
+  default:
+    // Reassociate default patterns.
+    TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+                                                DelInstrs, InstrIdxForVirtReg);
+    break;
+  }
+}
+
+// Currently, only handle two patterns REASSOC_XY_AMM_BMM and
+// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns.
+void PPCInstrInfo::reassociateFMA(
+    MachineInstr &Root, MachineCombinerPattern Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+  MachineFunction *MF = Root.getMF();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineOperand &OpC = Root.getOperand(0);
+  Register RegC = OpC.getReg();
+  const TargetRegisterClass *RC = MRI.getRegClass(RegC);
+  MRI.constrainRegClass(RegC, RC);
+
+  unsigned FmaOp = Root.getOpcode();
+  int16_t Idx = getFMAOpIdxInfo(FmaOp);
+  assert(Idx >= 0 && "Root must be a FMA instruction");
+
+  uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
+  uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
+  MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
+  MachineInstr *Leaf =
+      MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
+  uint16_t IntersectedFlags =
+      Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
+
+  auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
+                            bool &KillFlag) {
+    Reg = Operand.getReg();
+    MRI.constrainRegClass(Reg, RC);
+    KillFlag = Operand.isKill();
+  };
+
+  auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
+                             Register &MulOp2, bool &MulOp1KillFlag,
+                             bool &MulOp2KillFlag) {
+    GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
+    GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
+  };
+
+  Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32;
+  bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
+       KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false;
+
+  GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32);
+  GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22);
+
+  if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+    GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12);
+    GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
+  } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+    GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
+    GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
+  }
+
+  // Create new virtual registers for the new results instead of
+  // recycling legacy ones because the MachineCombiner's computation of the
+  // critical path requires a new register definition rather than an existing
+  // one.
+  Register NewVRA = MRI.createVirtualRegister(RC);
+  InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
+
+  Register NewVRB = MRI.createVirtualRegister(RC);
+  InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
+
+  Register NewVRD = 0;
+  if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+    NewVRD = MRI.createVirtualRegister(RC);
+    InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
+  }
+
+  auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
+                                Register RegMul1, bool KillRegMul1,
+                                Register RegMul2, bool KillRegMul2) {
+    MI->getOperand(AddOpIdx).setReg(RegAdd);
+    MI->getOperand(AddOpIdx).setIsKill(KillAdd);
+    MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
+    MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
+    MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
+    MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
+  };
+
+  if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
+    // Create new instructions for insertion.
+    MachineInstrBuilder MINewB =
+        BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+            .addReg(RegX, getKillRegState(KillX))
+            .addReg(RegM21, getKillRegState(KillM21))
+            .addReg(RegM22, getKillRegState(KillM22));
+    MachineInstrBuilder MINewA =
+        BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
+            .addReg(RegY, getKillRegState(KillY))
+            .addReg(RegM31, getKillRegState(KillM31))
+            .addReg(RegM32, getKillRegState(KillM32));
+    // if AddOpIdx is not 1, adjust the order.
+    if (AddOpIdx != 1) {
+      AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+      AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
+    }
+
+    MachineInstrBuilder MINewC =
+        BuildMI(*MF, Root.getDebugLoc(),
+                get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+            .addReg(NewVRB, getKillRegState(true))
+            .addReg(NewVRA, getKillRegState(true));
+
+    // update flags for new created instructions.
+    setSpecialOperandAttr(*MINewA, IntersectedFlags);
+    setSpecialOperandAttr(*MINewB, IntersectedFlags);
+    setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+    // Record new instructions for insertion.
+    InsInstrs.push_back(MINewA);
+    InsInstrs.push_back(MINewB);
+    InsInstrs.push_back(MINewC);
+  } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
+    assert(NewVRD && "new FMA register not created!");
+    // Create new instructions for insertion.
+    MachineInstrBuilder MINewA =
+        BuildMI(*MF, Leaf->getDebugLoc(),
+                get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)
+            .addReg(RegM11, getKillRegState(KillM11))
+            .addReg(RegM12, getKillRegState(KillM12));
+    MachineInstrBuilder MINewB =
+        BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
+            .addReg(RegX, getKillRegState(KillX))
+            .addReg(RegM21, getKillRegState(KillM21))
+            .addReg(RegM22, getKillRegState(KillM22));
+    MachineInstrBuilder MINewD =
+        BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
+            .addReg(NewVRA, getKillRegState(true))
+            .addReg(RegM31, getKillRegState(KillM31))
+            .addReg(RegM32, getKillRegState(KillM32));
+    // If AddOpIdx is not 1, adjust the order.
+    if (AddOpIdx != 1) {
+      AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
+      AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
+                         KillM32);
+    }
+
+    MachineInstrBuilder MINewC =
+        BuildMI(*MF, Root.getDebugLoc(),
+                get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
+            .addReg(NewVRB, getKillRegState(true))
+            .addReg(NewVRD, getKillRegState(true));
+
+    // update flags for new created instructions.
+    setSpecialOperandAttr(*MINewA, IntersectedFlags);
+    setSpecialOperandAttr(*MINewB, IntersectedFlags);
+    setSpecialOperandAttr(*MINewD, IntersectedFlags);
+    setSpecialOperandAttr(*MINewC, IntersectedFlags);
+
+    // Record new instructions for insertion.
+    InsInstrs.push_back(MINewA);
+    InsInstrs.push_back(MINewB);
+    InsInstrs.push_back(MINewD);
+    InsInstrs.push_back(MINewC);
+  }
+
+  assert(!InsInstrs.empty() &&
+         "Insertion instructions set should not be empty!");
+
+  // Record old instructions for deletion.
+  DelInstrs.push_back(Leaf);
+  DelInstrs.push_back(Prev);
+  DelInstrs.push_back(&Root);
+}
+
 // Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
 bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                          Register &SrcReg, Register &DstReg,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 3d6f7f1af346..ca30ced9d404 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -229,6 +229,11 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   unsigned getSpillTarget() const;
   const unsigned *getStoreOpcodesForSpillArray() const;
   const unsigned *getLoadOpcodesForSpillArray() const;
+  int16_t getFMAOpIdxInfo(unsigned Opcode) const;
+  void reassociateFMA(MachineInstr &Root, MachineCombinerPattern Pattern,
+                      SmallVectorImpl<MachineInstr *> &InsInstrs,
+                      SmallVectorImpl<MachineInstr *> &DelInstrs,
+                      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
   virtual void anchor();
 
 protected:
@@ -308,6 +313,20 @@ class PPCInstrInfo : public PPCGenInstrInfo {
     return true;
   }
 
+  /// When getMachineCombinerPatterns() finds patterns, this function generates
+  /// the instructions that could replace the original code sequence
+  void genAlternativeCodeSequence(
+      MachineInstr &Root, MachineCombinerPattern Pattern,
+      SmallVectorImpl<MachineInstr *> &InsInstrs,
+      SmallVectorImpl<MachineInstr *> &DelInstrs,
+      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+  /// Return true when there is potentially a faster code sequence for a fma
+  /// chain ending in \p Root. All potential patterns are output in the \p
+  /// P array.
+  bool getFMAPatterns(MachineInstr &Root,
+                      SmallVectorImpl<MachineCombinerPattern> &P) const;
+
   /// Return true when there is potentially a faster code sequence
   /// for an instruction chain ending in <Root>. All potential patterns are
   /// output in the <Pattern> array.
@@ -317,6 +336,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
 
   bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
 
+  /// On PowerPC, we try to reassociate FMA chain which will increase
+  /// instruction size. Set extension resource length limit to 1 for edge case.
+  /// Resource Length is calculated by scaled resource usage in getCycles().
+  /// Because of the division in getCycles(), it returns 
diff erent cycles due to
+  /// legacy scaled resource usage. So new resource length may be same with
+  /// legacy or 1 bigger than legacy.
+  /// We need to execlude the 1 bigger case even the resource length is not
+  /// perserved for more FMA chain reassociations on PowerPC.
+  int getExtendResourceLenLimit() const override { return 1; }
+
   void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
                              MachineInstr &NewMI1,
                              MachineInstr &NewMI2) const override;

diff  --git a/llvm/test/CodeGen/PowerPC/machine-combiner.ll b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
index 7206d5e4dfaa..118d1da969b6 100644
--- a/llvm/test/CodeGen/PowerPC/machine-combiner.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-combiner.ll
@@ -217,12 +217,12 @@ define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
 define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) {
 ; CHECK-LABEL: reassociate_mamaa_double:
 ; CHECK:       # %bb.0:
-; CHECK-QPX:   fadd 0, 2, 1
-; CHECK-QPX:   fmadd 0, 4, 3, 0
-; CHECK-QPX:   fmadd 1, 6, 5, 0
-; CHECK-PWR:   xsadddp 1, 2, 1
-; CHECK-PWR:   xsmaddadp 1, 4, 3
-; CHECK-PWR:   xsmaddadp 1, 6, 5
+; CHECK-QPX-DAG:   fmadd [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG:   fmadd [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX:       fadd 1, [[REG0]], [[REG1]]
+; CHECK-PWR-DAG:   xsmaddadp 1, 6, 5
+; CHECK-PWR-DAG:   xsmaddadp 2, 4, 3
+; CHECK-PWR:       xsadddp 1, 2, 1
 ; CHECK-NEXT:  blr
   %7 = fmul reassoc nsz double %3, %2
   %8 = fmul reassoc nsz double %5, %4
@@ -235,10 +235,10 @@ define double @reassociate_mamaa_double(double %0, double %1, double %2, double
 ; FIXME: should use xsmaddasp instead of fmadds for pwr7 arch.
 define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, float %4, float %5) {
 ; CHECK-LABEL: reassociate_mamaa_float:
-; CHECK:   # %bb.0:
-; CHECK:   fadds 0, 2, 1
-; CHECK:   fmadds 0, 4, 3, 0
-; CHECK:   fmadds 1, 6, 5, 0
+; CHECK:       # %bb.0:
+; CHECK-DAG:   fmadds [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-DAG:   fmadds [[REG1:[0-9]+]], 6, 5, 1
+; CHECK:       fadds 1, [[REG0]], [[REG1]]
 ; CHECK-NEXT:  blr
   %7 = fmul reassoc nsz float %3, %2
   %8 = fmul reassoc nsz float %5, %4
@@ -251,12 +251,12 @@ define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, fl
 define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) {
 ; CHECK-LABEL: reassociate_mamaa_vec:
 ; CHECK:       # %bb.0:
-; CHECK-QPX:   qvfadds 0, 2, 1
-; CHECK-QPX:   qvfmadds 0, 4, 3, 0
-; CHECK-QPX:   qvfmadds 1, 6, 5, 0
-; CHECK-PWR:   xvaddsp 34, 35, 34
-; CHECK-PWR:   xvmaddasp 34, 37, 36
-; CHECK-PWR:   xvmaddasp 34, 39, 38
+; CHECK-QPX-DAG:   qvfmadds [[REG0:[0-9]+]], 4, 3, 2
+; CHECK-QPX-DAG:   qvfmadds [[REG1:[0-9]+]], 6, 5, 1
+; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR-DAG:   xvmaddasp [[REG0:[0-9]+]], 39, 38
+; CHECK-PWR-DAG:   xvmaddasp [[REG1:[0-9]+]], 37, 36
+; CHECK-PWR:       xvaddsp 34, [[REG1]], [[REG0]]
 ; CHECK-NEXT:  blr
   %7 = fmul reassoc nsz <4 x float> %3, %2
   %8 = fmul reassoc nsz <4 x float> %5, %4
@@ -269,15 +269,16 @@ define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x f
 define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) {
 ; CHECK-LABEL: reassociate_mamama_double:
 ; CHECK:       # %bb.0:
-; CHECK-QPX:       fmadd 0, 2, 1, 7
-; CHECK-QPX-DAG:   fmadd 0, 4, 3, 0
-; CHECK-QPX-DAG:   fmadd 0, 6, 5, 0
-; CHECK-QPX:       fmadd 1, 9, 8, 0
+; CHECK-QPX:       fmadd [[REG0:[0-9]+]], 2, 1, 7
+; CHECK-QPX-DAG:   fmul [[REG1:[0-9]+]], 4, 3
+; CHECK-QPX-DAG:   fmadd [[REG2:[0-9]+]], 6, 5, [[REG0]]
+; CHECK-QPX-DAG:   fmadd [[REG3:[0-9]+]], 9, 8, [[REG1]]
+; CHECK-QPX:       fadd 1, [[REG2]], [[REG3]]
 ; CHECK-PWR:       xsmaddadp 7, 2, 1
-; CHECK-PWR-DAG:   xsmaddadp 7, 4, 3
+; CHECK-PWR-DAG:   xsmuldp [[REG0:[0-9]+]], 4, 3
 ; CHECK-PWR-DAG:   xsmaddadp 7, 6, 5
-; CHECK-PWR-DAG:   xsmaddadp 7, 9, 8
-; CHECK-PWR:       fmr 1, 7
+; CHECK-PWR-DAG:   xsmaddadp [[REG0]], 9, 8
+; CHECK-PWR:       xsadddp 1, 7, [[REG0]]
 ; CHECK-NEXT:  blr
   %10 = fmul reassoc nsz double %1, %0
   %11 = fmul reassoc nsz double %3, %2
@@ -295,16 +296,18 @@ define dso_local float @reassociate_mamama_8(float %0, float %1, float %2, float
                                              float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16) {
 ; CHECK-LABEL: reassociate_mamama_8:
 ; CHECK:       # %bb.0:
-; CHECK:       fmadds [[REG0:[0-9]+]], 3, 2, 1
-; CHECK-DAG:   fmadds [[REG0]], 5, 4, [[REG0]]
-; CHECK-DAG:   fmadds [[REG0]], 7, 6, [[REG0]]
-; CHECK-DAG:   fmadds [[REG0]], 9, 8, [[REG0]]
-; CHECK-DAG:   fmadds [[REG0]], 13, 12, [[REG0]]
-; CHECK-DAG:   fmadds [[REG0]], 11, 10, [[REG0]]
+; CHECK-DAG:    fmadds [[REG0:[0-9]+]], 3, 2, 1
+; CHECK-DAG:    fmuls  [[REG1:[0-9]+]], 5, 4
+; CHECK-DAG:    fmadds [[REG2:[0-9]+]], 7, 6, [[REG0]]
+; CHECK-DAG:    fmadds [[REG3:[0-9]+]], 9, 8, [[REG1]]
 ;
-; CHECK:       fmadds [[REG0]],
-; CHECK:       fmadds 1,
-; CHECK-NEXT:  blr
+; CHECK-DAG:    fmadds [[REG4:[0-9]+]], 13, 12, [[REG3]]
+; CHECK-DAG:    fmadds [[REG5:[0-9]+]], 11, 10, [[REG2]]
+;
+; CHECK-DAG:    fmadds [[REG6:[0-9]+]], 3, 2, [[REG4]]
+; CHECK-DAG:    fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]]
+; CHECK:        fadds 1, [[REG7]], [[REG6]]
+; CHECK-NEXT:   blr
   %18 = fmul reassoc nsz float %2, %1
   %19 = fadd reassoc nsz float %18, %0
   %20 = fmul reassoc nsz float %4, %3


        


More information about the llvm-commits mailing list