[llvm] AMDGPU: Fix temporal divergence introduced by machine-sink and performance regression introduced by D155343 (PR #67456)

Nicolai Hähnle via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 3 11:38:12 PDT 2023


================
@@ -171,6 +171,48 @@ bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
          isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
 }
 
+bool SIInstrInfo::isSafeToSink(MachineInstr &MI,
+                               MachineBasicBlock *SuccToSinkTo,
+                               MachineCycleInfo *CI) const {
+  CI->clear();
+  CI->compute(*MI.getMF());
+  MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+
+  // Check if sinking of MI would create temporal divergent use.
+  for (auto Op : MI.uses()) {
+    if (Op.isReg() && Op.getReg().isVirtual() &&
+        RI.isSGPRClass(MRI.getRegClass(Op.getReg()))) {
+      MachineInstr *SgprDef = MRI.getVRegDef(Op.getReg());
+
+      // SgprDef defined inside cycle
+      MachineCycle *FromCycle = CI->getCycle(SgprDef->getParent());
+      if (FromCycle == nullptr)
+        return true;
+
+      // After structurize-cfg, there should be exactly one cycle exit.
+      SmallVector<MachineBasicBlock *, 1> ExitBlocks;
+      FromCycle->getExitBlocks(ExitBlocks);
+      assert(ExitBlocks.size() == 1);
+      assert(ExitBlocks[0]->getSinglePredecessor());
+
+      // Cycle has divergent exit condition.
+      if (!hasDivergentBranch(ExitBlocks[0]->getSinglePredecessor()))
+        return true;
+
+      // SuccToSinkTo is not in the cycle.
+      if (FromCycle != CI->getCycle(SuccToSinkTo)) {
+
+        // Allow sinking if MI edits lane mask (divergent i1 in sgpr).
+        if (MI.getOpcode() == AMDGPU::SI_IF_BREAK)
+          return true;
----------------
nhaehnle wrote:

The opcode check is independent of the operands, so it should be done once up-front.

https://github.com/llvm/llvm-project/pull/67456


More information about the llvm-commits mailing list