[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

Nicolai Hähnle via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Mar 7 10:46:11 PST 2025


================
@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap<Register, std::pair<const MachineCycle *, Register>> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+    if (MRI->getType(Reg) != LLT::scalar(1))
+      continue;
+
+    const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+    if (CachedLRC) {
+      LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+      assert(LRC->contains(CachedLRC));
+    }
+
+    LRCCache[Reg] = {LRC, {}};
+  }
+
+  for (auto LRCIter : LRCCache) {
+    Register Reg = LRCIter.first;
+    const MachineCycle *Cycle = LRCIter.second.first;
+
+    if (MRI->getType(Reg) != LLT::scalar(1))
+      continue;
+
+    Register MergedMask = MRI->createVirtualRegister(BoolS1);
+    SSAUpdater.Initialize(MergedMask);
+
+    MachineBasicBlock *MBB = MRI->getVRegDef(Reg)->getParent();
+    SSAUpdater.AddAvailableValue(MBB, MergedMask);
+
+    for (auto Entry : Cycle->getEntries()) {
+      for (MachineBasicBlock *Pred : Entry->predecessors()) {
+        if (!Cycle->contains(Pred)) {
+          B.setInsertPt(*Pred, Pred->getFirstTerminator());
+          auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
+          SSAUpdater.AddAvailableValue(Pred, ImplDef.getReg(0));
+        }
+      }
+    }
+
+    buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
+                        SSAUpdater.GetValueInMiddleOfBlock(MBB), Reg);
+
+    LRCCache[Reg].second = MergedMask;
+  }
+
+  for (auto [Reg, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
+    if (MRI->getType(Reg) != LLT::scalar(1))
+      continue;
+
+    replaceUsesOfRegInInstWith(Reg, UseInst, LRCCache[Reg].second);
----------------
nhaehnle wrote:

Can use .lookup instead of operator[] for consistency with above.

https://github.com/llvm/llvm-project/pull/124299


More information about the llvm-branch-commits mailing list