[llvm] [AMDGPU] Create an AMDGPUIfConverter pass (PR #106415)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 09:40:02 PDT 2024


Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/106415 at github.com>


================
@@ -0,0 +1,279 @@
+#include <llvm/CodeGen/MachineBasicBlock.h>
+#include <llvm/CodeGen/MachineBranchProbabilityInfo.h>
+#include <llvm/CodeGen/MachineDominators.h>
+#include <llvm/CodeGen/MachineFunctionPass.h>
+#include <llvm/CodeGen/MachineLoopInfo.h>
+#include <llvm/CodeGen/SSAIfConv.h>
+#include <llvm/CodeGen/TargetInstrInfo.h>
+#include <llvm/CodeGen/TargetRegisterInfo.h>
+#include <llvm/CodeGen/TargetSchedule.h>
+#include <llvm/CodeGen/TargetSubtargetInfo.h>
+#include <llvm/InitializePasses.h>
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+namespace {
+#define DEBUG_TYPE "amdgpu-if-cvt"
+const char PassName[] = "AMDGPU if conversion";
+
+class AMDGPUIfConverter : public MachineFunctionPass {
+  const SIInstrInfo *TII = nullptr;
+  TargetSchedModel SchedModel;
+  MachineDominatorTree *DomTree = nullptr;
+  MachineBranchProbabilityInfo *MBPI = nullptr;
+  MachineLoopInfo *Loops = nullptr;
+
+  static constexpr unsigned BlockInstrLimit = 30;
+  static constexpr bool Stress = false;
+  SSAIfConv IfConv{DEBUG_TYPE, BlockInstrLimit, Stress};
+
+public:
+  static char ID;
+
+  AMDGPUIfConverter() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool tryConvertIf(MachineBasicBlock *);
+  bool shouldConvertIf();
+
+  StringRef getPassName() const override { return PassName; }
+};
+
+char AMDGPUIfConverter::ID = 0;
+
+void AMDGPUIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineDominatorTreeWrapperPass>();
+  AU.addPreserved<MachineDominatorTreeWrapperPass>();
+  AU.addRequired<MachineLoopInfoWrapperPass>();
+  AU.addPreserved<MachineLoopInfoWrapperPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+    return false;
+
+  const auto &STI = MF.getSubtarget<GCNSubtarget>();
+  if (!STI.hasGFX10_3Insts())
+    return false;
+
+  TII = STI.getInstrInfo();
+  SchedModel.init(&STI);
+  DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+  Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+
+  bool Changed = false;
+  IfConv.runOnMachineFunction(MF);
+
+  for (auto *DomNode : post_order(DomTree))
+    if (tryConvertIf(DomNode->getBlock()))
+      Changed = true;
+
+  return Changed;
+}
+
+unsigned getReversedVCMPXOpcode(unsigned Opcode) {
+  // TODO: this is a placeholder for the real function
+  switch (Opcode) {
+  case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
+    return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
+  default:
+    errs() << "unhandled: " << Opcode << "\n";
+    llvm_unreachable("unhandled vcmp opcode");
+  }
+}
+
+bool needsPredication(const SIInstrInfo *TII, const MachineInstr &I) {
+  return TII->isVALU(I) || TII->isVMEM(I);
+}
+
+struct ExecPredicate : ifcvt::PredicationStrategy {
+  const SIInstrInfo *TII;
+  const SIRegisterInfo *RegInfo;
+
+  MachineInstr *Cmp = nullptr;
+
+  ExecPredicate(const SIInstrInfo *TII)
+      : TII(TII), RegInfo(&TII->getRegisterInfo()) {}
+
+  bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB,
+                    MachineBasicBlock *FBB, MachineBasicBlock *Tail,
+                    ArrayRef<MachineOperand> Cond) override {
+
+    // check that the cmp is just before the branch and that it is promotable to
+    // v_cmpx
+    const unsigned SupportedBranchOpc[]{
+        AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
+        AMDGPU::S_CBRANCH_VCCZ};
+
+    MachineInstr &CBranch = *Head->getFirstInstrTerminator();
+    if (!llvm::is_contained(SupportedBranchOpc, CBranch.getOpcode()))
+      return false;
+
+    auto CmpInstr = std::next(CBranch.getReverseIterator());
+    if (CmpInstr == Head->instr_rend())
+      return false;
+
+    Register SCCorVCC = Cond[1].getReg();
+    bool ModifiesConditionReg = CmpInstr->modifiesRegister(SCCorVCC, RegInfo);
+    if (!ModifiesConditionReg)
+      return false;
+
+    Cmp = &*CmpInstr;
+
+    unsigned CmpOpc = Cmp->getOpcode();
+    if (TII->isSALU(*Cmp))
+      CmpOpc = TII->getVALUOp(*Cmp);
+    if (AMDGPU::getVCMPXOpFromVCMP(CmpOpc) == -1) {
+      errs() << *Cmp << "\n";
+      return false;
+    }
+
+    auto NeedsPredication = [&](const MachineInstr &I) {
+      return needsPredication(TII, I);
+    };
+    auto BlockNeedsPredication = [&](const MachineBasicBlock *MBB) {
+      if (MBB == Tail)
+        return false;
+      auto Insts = llvm::make_range(MBB->begin(), MBB->getFirstTerminator());
+      return llvm::any_of(Insts, NeedsPredication);
+    };
+
+    MachineBasicBlock *Blocks[] = {TBB, FBB};
+
+    if (llvm::none_of(Blocks, BlockNeedsPredication))
+      return false;
+
+    return true;
+  }
+
+  bool canPredicate(const MachineInstr &I) override {
+
+    // TODO: relax this condition, if exec is masked, check that it goes back to
+    // normal
+    // TODO: what about scc or vcc ? Are they taken into acount in the MBB
+    // live-ins ?
+    MCRegister Exec = RegInfo->getExec();
+    bool ModifiesExec = I.modifiesRegister(Exec, RegInfo);
+    if (ModifiesExec)
+      return false;
+
+    if (needsPredication(TII, I))
+      return true;
+
+    bool DontMoveAcrossStore = true;
+    bool IsSpeculatable = I.isDereferenceableInvariantLoad() ||
+                          I.isSafeToMove(DontMoveAcrossStore);
+    if (IsSpeculatable)
+      return true;
+
+    return false;
+  }
+
+  bool predicateBlock(MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
+                      bool Reverse) override {
+    // save exec
+    MachineFunction &MF = *MBB->getParent();
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+    Register ExecBackup = MFI->getSGPRForEXECCopy();
+
+    const DebugLoc &CmpLoc = Cmp->getDebugLoc();
+
+    auto FirstInstruction = MBB->begin();
+    const bool IsSCCLive =
+        false; // asume not since the live-ins are supposed to be empty
+    TII->insertScratchExecCopy(MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
+                               IsSCCLive);
+
+    // mask exec
+    unsigned CmpOpc = Cmp->getOpcode();
+    if (TII->isSALU(*Cmp))
+      CmpOpc = TII->getVALUOp(*Cmp);
+
+    CmpOpc = AMDGPU::getVCMPXOpFromVCMP(CmpOpc);
+    if (Reverse)
+      CmpOpc = getReversedVCMPXOpcode(CmpOpc);
+
+    // TODO: handle this properly. The second block may kill those registers.
+    Cmp->getOperand(0).setIsKill(false);
+    Cmp->getOperand(1).setIsKill(false);
+
+    auto VCmpX = BuildMI(*MBB, FirstInstruction, CmpLoc, TII->get(CmpOpc));
+    VCmpX->addOperand(Cmp->getOperand(0));
+    VCmpX->addOperand(Cmp->getOperand(1));
+
+    // restore exec
+    TII->restoreExec(MF, *MBB, MBB->end(), DebugLoc(), ExecBackup);
+
+    return true;
+  }
+
+  ~ExecPredicate() override = default;
+};
+
+/// Update the dominator tree after if-conversion erased some blocks.
+void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
+                   ArrayRef<MachineBasicBlock *> Removed) {
+  // convertIf can remove TBB, FBB, and Tail can be merged into Head.
+  // TBB and FBB should not dominate any blocks.
+  // Tail children should be transferred to Head.
+  MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
+  for (auto *B : Removed) {
+    MachineDomTreeNode *Node = DomTree->getNode(B);
+    assert(Node != HeadNode && "Cannot erase the head node");
+    while (Node->getNumChildren()) {
+      assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
+      DomTree->changeImmediateDominator(Node->back(), HeadNode);
+    }
+    DomTree->eraseNode(B);
+  }
+}
+
+/// Update LoopInfo after if-conversion.
+void updateLoops(MachineLoopInfo *Loops,
+                 ArrayRef<MachineBasicBlock *> Removed) {
+  // If-conversion doesn't change loop structure, and it doesn't mess with back
+  // edges, so updating LoopInfo is simply removing the dead blocks.
+  for (auto *B : Removed)
+    Loops->removeBlock(B);
+}
+
+bool AMDGPUIfConverter::shouldConvertIf() {
+  // TODO: cost model
+  return true;
+}
+
+bool AMDGPUIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
+  ExecPredicate Predicate{TII};
+  bool Changed = false;
+  while (IfConv.canConvertIf(MBB, Predicate) && shouldConvertIf()) {
+    // If-convert MBB and update analyses.
+    SmallVector<MachineBasicBlock *, 4> RemoveBlocks;
+    IfConv.convertIf(RemoveBlocks, Predicate);
+    Changed = true;
+    updateDomTree(DomTree, IfConv, RemoveBlocks);
+    for (MachineBasicBlock *MBB : RemoveBlocks)
+      MBB->eraseFromParent();
+    updateLoops(Loops, RemoveBlocks);
+  }
+  return Changed;
+}
+
+} // namespace
+
+char &llvm::AMDGPUIfConverterID = AMDGPUIfConverter::ID;
+INITIALIZE_PASS_BEGIN(AMDGPUIfConverter, DEBUG_TYPE, PassName, false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUIfConverter, DEBUG_TYPE, PassName, false, false)
----------------
arsenm wrote:

Missing line at end of file 

https://github.com/llvm/llvm-project/pull/106415


More information about the llvm-commits mailing list