[llvm] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: use BranchProbability and TargetSchedmodel (PR #109818)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 04:15:55 PDT 2024
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/109818 at github.com>
================
@@ -304,11 +307,95 @@ bool SIPreEmitPeephole::getBlockDestinations(
return true;
}
-bool SIPreEmitPeephole::mustRetainExeczBranch(
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+namespace {
+class CostModelBase {
+public:
+ virtual bool isProfitable(const MachineInstr &MI) = 0;
+ virtual ~CostModelBase() = default;
+ static std::unique_ptr<CostModelBase> Create(const MachineBasicBlock &MBB,
+ const MachineBasicBlock &,
+ const SIInstrInfo &TII);
+};
+
+class TrivialCostModel : public CostModelBase {
+ friend CostModelBase;
+
unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
+ const SIInstrInfo &TII;
+
+ TrivialCostModel(const SIInstrInfo &TII) : TII(TII) {}
+
+public:
+ bool isProfitable(const MachineInstr &MI) override {
+ ++NumInstr;
+ if (NumInstr >= SkipThreshold)
+ return false;
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII.isSMRD(MI) || TII.isVMEM(MI) || TII.isFLAT(MI) || TII.isDS(MI) ||
+ TII.isWaitcnt(MI.getOpcode()))
+ return false;
+ return true;
+ }
+ ~TrivialCostModel() override = default;
+};
+
+class BranchWeightCostModel : public CostModelBase {
+ friend CostModelBase;
+
+ unsigned long ExecNZBranchCost;
+ unsigned long UnconditionalBranchCost;
+ unsigned long N;
+ unsigned long D;
+ unsigned long ThenCyclesCost = 0;
+ const TargetSchedModel &SchedModel;
+
+ BranchWeightCostModel(const BranchProbability &BP,
+ const TargetSchedModel &SchedModel)
+ : SchedModel(SchedModel) {
+ assert(!BP.isUnknown());
+ assert(SchedModel.hasInstrSchedModel());
+ ExecNZBranchCost = SchedModel.computeInstrLatency(AMDGPU::S_CBRANCH_EXECZ);
+ UnconditionalBranchCost = SchedModel.computeInstrLatency(AMDGPU::S_BRANCH);
+ N = BP.getNumerator();
+ D = BP.getDenominator();
+ }
+public:
+ bool isProfitable(const MachineInstr &MI) override {
+ ThenCyclesCost += SchedModel.computeInstrLatency(&MI, false);
+
+ // Consider `P = N/D` to be the probability of execnz being true
+ // The transformation is profitable if always executing the 'then' block
+ // is cheaper than executing sometimes 'then', s_branch and always
+ // executing s_cbranch_execnz
+ return (D - N) * ThenCyclesCost <=
+ D * ExecNZBranchCost + (D - N) * UnconditionalBranchCost;
+ }
+ ~BranchWeightCostModel() override = default;
+};
+
+std::unique_ptr<CostModelBase>
+CostModelBase::Create(const MachineBasicBlock &Head,
+ const MachineBasicBlock &Succ, const SIInstrInfo &TII) {
+ const auto *FromIt = find(Head.successors(), &Succ);
+ assert(FromIt != Head.succ_end());
+ BranchProbability ExecNZProb = Head.getSuccProbability(FromIt);
+
+ auto &SchedModel = TII.getSchedModel();
+ if (SchedModel.hasInstrSchedModel() && !ExecNZProb.isUnknown())
+ return std::unique_ptr<CostModelBase>(
+ new BranchWeightCostModel(ExecNZProb, SchedModel));
+
+ return std::unique_ptr<CostModelBase>(new TrivialCostModel(TII));
----------------
arsenm wrote:
Don't see why these need allocation
https://github.com/llvm/llvm-project/pull/109818
More information about the llvm-commits
mailing list