[llvm] [AMDGPU] Account for implicit XCNT insertion (PR #160812)

Fri Oct 3 05:22:38 PDT 2025

================
@@ -1003,18 +994,27 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
         // score.
         for (MachineOperand &DefMO : Inst.all_defs()) {
           if (TRI->isVGPR(*MRI, DefMO.getReg())) {
-            setScoreByOperand(&Inst, DefMO, EXP_CNT, CurrScore);
+            setScoreByOperand(&Inst, TRI, MRI, DefMO, EXP_CNT, CurrScore);
           }
         }
       }
       for (const MachineOperand &Op : Inst.all_uses()) {
         if (TRI->isVectorRegister(*MRI, Op.getReg()))
-          setScoreByOperand(&Inst, Op, EXP_CNT, CurrScore);
+          setScoreByOperand(&Inst, TRI, MRI, Op, EXP_CNT, CurrScore);
       }
     }
   } else if (T == X_CNT) {
+    WaitEventType OtherEvent = E == SMEM_GROUP ? VMEM_GROUP : SMEM_GROUP;
+    if (PendingEvents & (1 << OtherEvent)) {
+      // Hardware inserts an implicit xcnt between interleaved
+      // SMEM and VMEM operations. So there will never be
+      // outstanding address translations for both SMEM and
+      // VMEM at the same time.
+      setScoreLB(T, CurrScore - 1);
----------------
jayfoad wrote:

This is a bit unorthodox. You would normally write `setScoreLB(T, getScoreUB(T))` like in `applyWaitcnt`. In fact maybe you can use `applyWaitcnt(T, 0)` here, since it can update PendingEvents for you too?

https://github.com/llvm/llvm-project/pull/160812