[llvm] 2693efa - [MachineCombiner] Support local strategy for traces

Anton Sidorenko via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 17 02:18:04 PST 2023


Author: Anton Sidorenko
Date: 2023-02-17T13:17:22+03:00
New Revision: 2693efa8a5bcd5196264c54f36f7fe6df6799420

URL: https://github.com/llvm/llvm-project/commit/2693efa8a5bcd5196264c54f36f7fe6df6799420
DIFF: https://github.com/llvm/llvm-project/commit/2693efa8a5bcd5196264c54f36f7fe6df6799420.diff

LOG: [MachineCombiner] Support local strategy for traces

For in-order cores MachineCombiner makes better decisions when the critical path
is calculated only for the current basic block and does not take into account
other blocks from the trace.

This patch adds a virtual method to TargetInstrInfo to allow each target decide
which strategy to use.

Depends on D140541

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D140542

Added: 
    llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll

Modified: 
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/lib/CodeGen/MachineCombiner.cpp
    llvm/lib/CodeGen/TargetInstrInfo.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.h
    llvm/test/CodeGen/RISCV/machine-combiner.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8185751f56cc7..92c283a29c36b 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -61,6 +61,7 @@ class TargetRegisterInfo;
 class TargetSchedModel;
 class TargetSubtargetInfo;
 enum class MachineCombinerPattern;
+enum class MachineTraceStrategy;
 
 template <class T> class SmallVectorImpl;
 
@@ -1251,6 +1252,9 @@ class TargetInstrInfo : public MCInstrInfo {
   /// Return true when a target supports MachineCombiner.
   virtual bool useMachineCombiner() const { return false; }
 
+  /// Return a strategy that MachineCombiner must use when creating traces.
+  virtual MachineTraceStrategy getMachineCombinerTraceStrategy() const;
+
   /// Return true if the given SDNode can be copied during scheduling
   /// even if it has glue.
   virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; }

diff  --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 1ae558bd27b82..fd02d1b37a358 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -95,7 +95,8 @@ class MachineCombiner : public MachineFunctionPass {
   bool isTransientMI(const MachineInstr *MI);
   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                    MachineTraceMetrics::Trace BlockTrace);
+                    MachineTraceMetrics::Trace BlockTrace,
+                    const MachineBasicBlock &MBB);
   unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
                       MachineTraceMetrics::Trace BlockTrace);
   bool
@@ -207,7 +208,8 @@ bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
 unsigned
 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                          MachineTraceMetrics::Trace BlockTrace) {
+                          MachineTraceMetrics::Trace BlockTrace,
+                          const MachineBasicBlock &MBB) {
   SmallVector<unsigned, 16> InstrDepth;
   // For each instruction in the new sequence compute the depth based on the
   // operands. Use the trace information when possible. For new operands which
@@ -237,7 +239,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                                                       InstrPtr, UseIdx);
       } else {
         MachineInstr *DefInstr = getOperandDef(MO);
-        if (DefInstr) {
+        if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
+                             MachineTraceStrategy::TS_Local ||
+                         DefInstr->getParent() == &MBB)) {
           DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
           if (!isTransientMI(DefInstr))
             LatencyOp = TSchedModel.computeOperandLatency(
@@ -374,7 +378,8 @@ bool MachineCombiner::improvesCriticalPathLen(
     MachineCombinerPattern Pattern,
     bool SlackIsAccurate) {
   // Get depth and latency of NewRoot and Root.
-  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+  unsigned NewRootDepth =
+      getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
   unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
 
   LLVM_DEBUG(dbgs() << "  Dependence data for " << *Root << "\tNewRootDepth: "
@@ -574,7 +579,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   // Check if the block is in a loop.
   const MachineLoop *ML = MLI->getLoopFor(MBB);
   if (!TraceEnsemble)
-    TraceEnsemble = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
+    TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
 
   SparseSet<LiveRegUnit> RegUnits;
   RegUnits.setUniverse(TRI->getNumRegUnits());

diff  --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index a90258980a081..e86724ad7c0f0 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -1049,6 +1050,10 @@ void TargetInstrInfo::genAlternativeCodeSequence(
   reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
 }
 
+MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
+  return MachineTraceStrategy::TS_MinInstrCount;
+}
+
 bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
     const MachineInstr &MI) const {
   const MachineFunction &MF = *MI.getMF();

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 472492b6edf2f..ee50d35f6fe7d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/MC/MCInstBuilder.h"
@@ -44,6 +45,16 @@ static cl::opt<bool> PreferWholeRegisterMove(
     "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
     cl::desc("Prefer whole register move for vector registers."));
 
+static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
+    "riscv-force-machine-combiner-strategy", cl::Hidden,
+    cl::desc("Force machine combiner to use a specific strategy for machine "
+             "trace metrics evaluation."),
+    cl::init(MachineTraceStrategy::TS_NumStrategies),
+    cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
+                          "Local strategy."),
+               clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
+                          "MinInstrCount strategy.")));
+
 namespace llvm::RISCVVPseudosTable {
 
 using namespace RISCV;
@@ -1257,6 +1268,20 @@ RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
   return std::nullopt;
 }
 
+MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
+  if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
+    // The option is unused. Choose Local strategy only for in-order cores. When
+    // scheduling model is unspecified, use MinInstrCount strategy as more
+    // generic one.
+    const auto &SchedModel = STI.getSchedModel();
+    return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
+               ? MachineTraceStrategy::TS_MinInstrCount
+               : MachineTraceStrategy::TS_Local;
+  }
+  // The strategy was forced by the option.
+  return ForceMachineCombinerStrategy;
+}
+
 void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
                                            MachineInstr &OldMI2,
                                            MachineInstr &NewMI1,

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index e2e5c424cb3a3..515a0071b2b62 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -195,6 +195,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   bool useMachineCombiner() const override { return true; }
 
+  MachineTraceStrategy getMachineCombinerTraceStrategy() const override;
+
   void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
                              MachineInstr &NewMI1,
                              MachineInstr &NewMI2) const override;

diff  --git a/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll
new file mode 100644
index 0000000000000..a50c6cab46a32
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mcpu=syntacore-scr1-max \
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK_SCR1,CHECK_LOCAL_SCR1
+
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mcpu=syntacore-scr1-max \
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK_SCR1,CHECK_GLOBAL_SCR1
+
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mcpu=sifive-u74 \
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK_SIFIVE_U74,CHECK_LOCAL_SIFIVE_U74
+
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mcpu=sifive-u74 \
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK_SIFIVE_U74,CHECK_GLOBAL_SIFIVE_U74
+
+define i32 @test_local_strategy(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK_LOCAL_SCR1-LABEL: test_local_strategy:
+; CHECK_LOCAL_SCR1:       # %bb.0: # %entry
+; CHECK_LOCAL_SCR1-NEXT:    div a0, a0, a1
+; CHECK_LOCAL_SCR1-NEXT:    sub a0, a0, a2
+; CHECK_LOCAL_SCR1-NEXT:    beqz a0, .LBB0_2
+; CHECK_LOCAL_SCR1-NEXT:  # %bb.1: # %b2
+; CHECK_LOCAL_SCR1-NEXT:    ret
+; CHECK_LOCAL_SCR1-NEXT:  .LBB0_2: # %b1
+; CHECK_LOCAL_SCR1-NEXT:    add a3, a3, a4
+; CHECK_LOCAL_SCR1-NEXT:    add a0, a0, a5
+; CHECK_LOCAL_SCR1-NEXT:    add a0, a0, a3
+; CHECK_LOCAL_SCR1-NEXT:    ret
+;
+; CHECK_GLOBAL_SCR1-LABEL: test_local_strategy:
+; CHECK_GLOBAL_SCR1:       # %bb.0: # %entry
+; CHECK_GLOBAL_SCR1-NEXT:    div a0, a0, a1
+; CHECK_GLOBAL_SCR1-NEXT:    sub a0, a0, a2
+; CHECK_GLOBAL_SCR1-NEXT:    beqz a0, .LBB0_2
+; CHECK_GLOBAL_SCR1-NEXT:  # %bb.1: # %b2
+; CHECK_GLOBAL_SCR1-NEXT:    ret
+; CHECK_GLOBAL_SCR1-NEXT:  .LBB0_2: # %b1
+; CHECK_GLOBAL_SCR1-NEXT:    add a3, a3, a4
+; CHECK_GLOBAL_SCR1-NEXT:    add a3, a3, a5
+; CHECK_GLOBAL_SCR1-NEXT:    add a0, a0, a3
+; CHECK_GLOBAL_SCR1-NEXT:    ret
+;
+; CHECK_LOCAL_SIFIVE_U74-LABEL: test_local_strategy:
+; CHECK_LOCAL_SIFIVE_U74:       # %bb.0: # %entry
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    divw a0, a0, a1
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    subw a0, a0, a2
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    beqz a0, .LBB0_2
+; CHECK_LOCAL_SIFIVE_U74-NEXT:  # %bb.1: # %b2
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    ret
+; CHECK_LOCAL_SIFIVE_U74-NEXT:  .LBB0_2: # %b1
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    add a3, a3, a4
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    add a0, a0, a5
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    addw a0, a0, a3
+; CHECK_LOCAL_SIFIVE_U74-NEXT:    ret
+;
+; CHECK_GLOBAL_SIFIVE_U74-LABEL: test_local_strategy:
+; CHECK_GLOBAL_SIFIVE_U74:       # %bb.0: # %entry
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    divw a0, a0, a1
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    subw a0, a0, a2
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    beqz a0, .LBB0_2
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:  # %bb.1: # %b2
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    ret
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:  .LBB0_2: # %b1
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    add a3, a3, a4
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    add a3, a3, a5
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    addw a0, a0, a3
+; CHECK_GLOBAL_SIFIVE_U74-NEXT:    ret
+entry:
+  %div = sdiv i32 %a0, %a1
+  %sub0 = sub i32 %div, %a2
+  %cmp = icmp eq i32 %sub0, 0
+  br i1 %cmp, label %b1, label %b2
+b1:
+  %sub1 = add i32 %a3, %a4
+  %sub2 = add i32 %a5, %sub1
+  %sub3 = add i32 %sub2, %sub0
+  ret i32 %sub3
+b2:
+  ret i32 %sub0
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK_SCR1: {{.*}}
+; CHECK_SIFIVE_U74: {{.*}}

diff  --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 3f20b62cfddba..1cb57ff307310 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1,7 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zbb,+zfh -verify-machineinstrs -mcpu=sifive-u74 \
-; RUN: -O1 -riscv-enable-machine-combiner=true < %s | \
-; RUN: FileCheck %s
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK_LOCAL
+
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zbb,+zfh -verify-machineinstrs -mcpu=sifive-u74 \
+; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK_GLOBAL
 
 define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3) {
 ; CHECK-LABEL: test_reassoc_fadd1:
@@ -1088,3 +1092,43 @@ declare double @llvm.minnum.f64(double, double)
 declare half @llvm.maxnum.f16(half, half)
 declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
+
+define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
+; CHECK_LOCAL-LABEL: test_fmadd_strategy:
+; CHECK_LOCAL:       # %bb.0: # %entry
+; CHECK_LOCAL-NEXT:    fmv.d ft0, fa0
+; CHECK_LOCAL-NEXT:    fsub.d ft1, fa0, fa1
+; CHECK_LOCAL-NEXT:    fmul.d fa0, ft1, fa2
+; CHECK_LOCAL-NEXT:    andi a0, a0, 1
+; CHECK_LOCAL-NEXT:    beqz a0, .LBB76_2
+; CHECK_LOCAL-NEXT:  # %bb.1: # %entry
+; CHECK_LOCAL-NEXT:    fmul.d ft1, ft0, fa1
+; CHECK_LOCAL-NEXT:    fmadd.d ft0, ft0, fa1, fa0
+; CHECK_LOCAL-NEXT:    fsub.d fa0, ft0, ft1
+; CHECK_LOCAL-NEXT:  .LBB76_2: # %entry
+; CHECK_LOCAL-NEXT:    ret
+;
+; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
+; CHECK_GLOBAL:       # %bb.0: # %entry
+; CHECK_GLOBAL-NEXT:    fmv.d ft0, fa0
+; CHECK_GLOBAL-NEXT:    fsub.d ft1, fa0, fa1
+; CHECK_GLOBAL-NEXT:    fmul.d fa0, ft1, fa2
+; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
+; CHECK_GLOBAL-NEXT:    beqz a0, .LBB76_2
+; CHECK_GLOBAL-NEXT:  # %bb.1: # %entry
+; CHECK_GLOBAL-NEXT:    fmul.d ft0, ft0, fa1
+; CHECK_GLOBAL-NEXT:    fadd.d ft1, ft0, fa0
+; CHECK_GLOBAL-NEXT:    fsub.d fa0, ft1, ft0
+; CHECK_GLOBAL-NEXT:  .LBB76_2: # %entry
+; CHECK_GLOBAL-NEXT:    ret
+entry:
+  %sub = fsub contract double %a0, %a1
+  %mul = fmul contract double %sub, %a2
+  %and = and i64 %flag, 1
+  %tobool.not = icmp eq i64 %and, 0
+  %mul2 = fmul contract double %a0, %a1
+  %add = fadd contract double %mul2, %mul
+  %sub3 = fsub contract double %add, %mul2
+  %retval.0 = select i1 %tobool.not, double %mul, double %sub3
+  ret double %retval.0
+}


        


More information about the llvm-commits mailing list