[llvm] [AMDGPU] Teach SIPreEmitPeephole pass to preserve MachineLoopInfo (PR #178868)

Dark Steve via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 30 03:16:42 PST 2026


https://github.com/PrasoonMishra created https://github.com/llvm/llvm-project/pull/178868

SIPreEmitPeephole performs several optimizations, but only some of them modify the CFG.

Optimizations that do not modify the CFG:
- `optimizeSetGPR()` - removes redundant `S_SET_GPR_IDX_ON` instructions
- Unpacking of packed instructions (`V_PK_MUL_F32`, `V_PK_ADD_F32`, `V_PK_FMA_F32`) for MFMA co-issue - transforms packed instructions into unpacked equivalents
- `optimizeVccBranch()` - when removing redundant `S_AND_B64` after a VOPC comparison instruction (the branch itself remains unchanged)

Added a `CFGModified` flag that is only set when CFG-modifying transformations occur, allowing MLI to be preserved when CFG is not modified.

>From 4783861a22b836e1a63e433e0afdd3ae2203996b Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 30 Jan 2026 08:14:30 +0000
Subject: [PATCH 1/2] Preserve MLI info in SIPreEmitPeephole.

---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 21 ++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 385127cb26eea..e2c7d89f9325f 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/BranchProbability.h"
@@ -37,8 +38,9 @@ class SIPreEmitPeephole {
 private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
+  bool CFGModified = false;
 
-  bool optimizeVccBranch(MachineInstr &MI) const;
+  bool optimizeVccBranch(MachineInstr &MI);
   bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
   bool getBlockDestinations(MachineBasicBlock &SrcMBB,
                             MachineBasicBlock *&TrueMBB,
@@ -79,6 +81,7 @@ class SIPreEmitPeephole {
 
 public:
   bool run(MachineFunction &MF);
+  bool isCFGModified() const { return CFGModified; }
 };
 
 class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
@@ -103,7 +106,7 @@ char SIPreEmitPeepholeLegacy::ID = 0;
 
 char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
 
-bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
+bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) {
   // Match:
   // sreg = -1 or 0
   // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
@@ -228,6 +231,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
 
   bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
   if (SReg == ExecReg) {
+    CFGModified = true;
     // EXEC is updated directly
     if (IsVCCZ) {
       MI.eraseFromParent();
@@ -235,6 +239,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
     }
     MI.setDesc(TII->get(AMDGPU::S_BRANCH));
   } else if (IsVCCZ && MaskValue == 0) {
+    CFGModified = true;
     // Will always branch
     // Remove all successors shadowed by new unconditional branch
     MachineBasicBlock *Parent = MI.getParent();
@@ -264,6 +269,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
     MI.setDesc(TII->get(AMDGPU::S_BRANCH));
   } else if (!IsVCCZ && MaskValue == 0) {
     // Will never branch
+    CFGModified = true;
     MachineOperand &Dst = MI.getOperand(0);
     assert(Dst.isMBB() && "destination is not basic block");
     MI.getParent()->removeSuccessor(Dst.getMBB());
@@ -449,6 +455,7 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
   LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
   MI.eraseFromParent();
   SrcMBB.removeSuccessor(TrueMBB);
+  CFGModified = true;
 
   return true;
 }
@@ -709,9 +716,14 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
                                  MachineFunctionAnalysisManager &MFAM) {
   auto *MDT = MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
   auto *MPDT = MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
+  SIPreEmitPeephole Impl;
 
-  if (SIPreEmitPeephole().run(MF))
-    return getMachineFunctionPassPreservedAnalyses();
+  if (Impl.run(MF)) {
+    auto PA = getMachineFunctionPassPreservedAnalyses();
+    if (!Impl.isCFGModified())
+      PA.preserve<MachineLoopAnalysis>();
+    return PA;
+  }
 
   if (MDT)
     MDT->updateBlockNumbers();
@@ -724,6 +736,7 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
+  CFGModified = false;
   bool Changed = false;
 
   MF.RenumberBlocks();

>From 49a9e9ab8a235213e904bf600a9b8177845688a5 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 30 Jan 2026 09:34:35 +0000
Subject: [PATCH 2/2] Add test.

---
 ...i-pre-emit-peephole-preserve-loop-info.mir | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir

diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir
new file mode 100644
index 0000000000000..4347117f7dc94
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir
@@ -0,0 +1,27 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-pre-emit-peephole,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
+
+# CHECK: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running pass: SIPreEmitPeepholePass on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Invalidating analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running pass: MachineLoopPrinterPass on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Machine loop info for machine function 'vcc_and_removal_preserves_mli':
+# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><latch><exiting>
+
+---
+name: vcc_and_removal_preserves_mli
+body: |
+  bb.0:
+    S_BRANCH %bb.1
+
+  ; S_AND gets removed
+  bb.1:
+    V_CMP_EQ_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
+    $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...



More information about the llvm-commits mailing list