[llvm] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: use BranchProbability and TargetSchedmodel (PR #109818)

Wed Sep 25 04:15:55 PDT 2024

Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/109818 at github.com>


================
@@ -304,11 +307,95 @@ bool SIPreEmitPeephole::getBlockDestinations(
   return true;
 }
 
-bool SIPreEmitPeephole::mustRetainExeczBranch(
-    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+namespace {
+class CostModelBase {
+public:
+  virtual bool isProfitable(const MachineInstr &MI) = 0;
+  virtual ~CostModelBase() = default;
+  static std::unique_ptr<CostModelBase> Create(const MachineBasicBlock &MBB,
+                                               const MachineBasicBlock &,
+                                               const SIInstrInfo &TII);
+};
+
+class TrivialCostModel : public CostModelBase {
+  friend CostModelBase;
+
   unsigned NumInstr = 0;
-  const MachineFunction *MF = From.getParent();
+  const SIInstrInfo &TII;
+
+  TrivialCostModel(const SIInstrInfo &TII) : TII(TII) {}
+
+public:
+  bool isProfitable(const MachineInstr &MI) override {
+    ++NumInstr;
+    if (NumInstr >= SkipThreshold)
+      return false;
+    // These instructions are potentially expensive even if EXEC = 0.
+    if (TII.isSMRD(MI) || TII.isVMEM(MI) || TII.isFLAT(MI) || TII.isDS(MI) ||
+        TII.isWaitcnt(MI.getOpcode()))
+      return false;
+    return true;
+  }
+  ~TrivialCostModel() override = default;
+};
+
+class BranchWeightCostModel : public CostModelBase {
+  friend CostModelBase;
+
+  unsigned long ExecNZBranchCost;
+  unsigned long UnconditionalBranchCost;
+  unsigned long N;
+  unsigned long D;
+  unsigned long ThenCyclesCost = 0;
+  const TargetSchedModel &SchedModel;
+
+  BranchWeightCostModel(const BranchProbability &BP,
+                        const TargetSchedModel &SchedModel)
+      : SchedModel(SchedModel) {
+    assert(!BP.isUnknown());
+    assert(SchedModel.hasInstrSchedModel());
+    ExecNZBranchCost = SchedModel.computeInstrLatency(AMDGPU::S_CBRANCH_EXECZ);
+    UnconditionalBranchCost = SchedModel.computeInstrLatency(AMDGPU::S_BRANCH);
+    N = BP.getNumerator();
+    D = BP.getDenominator();
+  }
 
+public:
+  bool isProfitable(const MachineInstr &MI) override {
+    ThenCyclesCost += SchedModel.computeInstrLatency(&MI, false);
+
+    // Consider `P = N/D` to be the probability of execnz being true
+    // The transformation is profitable if always executing the 'then' block
+    // is cheaper than executing sometimes 'then', s_branch and always
+    // executing s_cbranch_execnz
+    return (D - N) * ThenCyclesCost <=
+           D * ExecNZBranchCost + (D - N) * UnconditionalBranchCost;
+  }
+  ~BranchWeightCostModel() override = default;
+};
+
+std::unique_ptr<CostModelBase>
+CostModelBase::Create(const MachineBasicBlock &Head,
+                      const MachineBasicBlock &Succ, const SIInstrInfo &TII) {
+  const auto *FromIt = find(Head.successors(), &Succ);
+  assert(FromIt != Head.succ_end());
+  BranchProbability ExecNZProb = Head.getSuccProbability(FromIt);
+
+  auto &SchedModel = TII.getSchedModel();
+  if (SchedModel.hasInstrSchedModel() && !ExecNZProb.isUnknown())
+    return std::unique_ptr<CostModelBase>(
+        new BranchWeightCostModel(ExecNZProb, SchedModel));
+
+  return std::unique_ptr<CostModelBase>(new TrivialCostModel(TII));
----------------
arsenm wrote:

Don't see why these need allocation 

https://github.com/llvm/llvm-project/pull/109818