[llvm] [AMDGPU][SIInsertWaitcnts][NFC] Clean up loop (PR #179572)

Thu Feb 12 08:39:24 PST 2026

================
@@ -3009,31 +3009,30 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
   MachineInstr *OldWaitcntInstr = nullptr;
   AtomicRMWState RMWState = AtomicRMWState::NotInBlock;
 
+  // NOTE: we may erase Inst and/or append instrs after Inst while iterating
   for (MachineBasicBlock::instr_iterator Iter = Block.instr_begin(),
                                          E = Block.instr_end();
        Iter != E;) {
-    MachineInstr &Inst = *Iter;
-    if (Inst.isMetaInstruction()) {
-      ++Iter;
+    // Early increment Iter because we may erase Inst while iterating.
+    MachineInstr &Inst = *Iter++;
+    if (Inst.isMetaInstruction())
       continue;
-    }
     // Get the atomic RMW block state for current instruction.
     RMWState = getAtomicRMWState(Inst, RMWState);
 
     // Track pre-existing waitcnts that were added in earlier iterations or by
     // the memory legalizer.
     if (isWaitInstr(Inst) ||
         (IsExpertMode && Inst.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR)) {
-      ++Iter;
       bool IsSoftXcnt = isSoftXcnt(Inst);
       // The Memory Legalizer conservatively inserts a soft xcnt before each
       // atomic RMW operation. However, for sequences of back-to-back atomic
       // RMWs, only the first s_wait_xcnt insertion is necessary. Optimize away
       // the redundant soft xcnts when we're inside an atomic RMW block.
-      if (Iter != E && IsSoftXcnt) {
+      if (&Inst != &Block.back() && IsSoftXcnt) {
         // Check if the next instruction can potentially change the atomic RMW
         // state.
-        RMWState = getAtomicRMWState(*Iter, RMWState);
+        RMWState = getAtomicRMWState(*Inst.getNextNode(), RMWState);
----------------
vporpo wrote:

Yes, now that we are pre-incrementing the iterator,`Iter != E` is better.
Done.

https://github.com/llvm/llvm-project/pull/179572