[llvm] [AMDGPU] Teach SILateBranchLowering pass to preserve MachineLoopInfo (PR #178276)

Dark Steve via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 29 08:02:37 PST 2026


https://github.com/PrasoonMishra updated https://github.com/llvm/llvm-project/pull/178276

>From 7022be9e225d9bf804b51fe4ea9e139dfc91c2a9 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Tue, 27 Jan 2026 18:48:21 +0000
Subject: [PATCH 1/5] [AMDGPU] Teach SILateBranchLowering to preserve
 MachineLoopInfo

When splitting blocks inside loops due to SI_EARLY_TERMINATE_SCC0 handling,
add the split block to the loop to keep MachineLoopInfo valid.
---
 .../Target/AMDGPU/SILateBranchLowering.cpp    | 30 +++++++++++----
 ...ate-branch-lowering-preserve-loop-info.mir | 37 +++++++++++++++++++
 2 files changed, 59 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir

diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d6f175e67ea40..2b955f0a6ad4c 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/InitializePasses.h"
 
@@ -32,6 +33,7 @@ class SILateBranchLowering {
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   MachineDominatorTree *MDT;
+  MachineLoopInfo *MLI;
   const AMDGPU::LaneMaskConstants &LMC;
 
   void expandChainCall(MachineInstr &MI, const GCNSubtarget &ST,
@@ -39,9 +41,10 @@ class SILateBranchLowering {
   void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
 
 public:
-  SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT)
+  SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT,
+                       MachineLoopInfo *MLI)
       : ST(ST), TII(ST.getInstrInfo()), TRI(&TII->getRegisterInfo()), MDT(MDT),
-        LMC(AMDGPU::LaneMaskConstants::get(ST)) {}
+        MLI(MLI), LMC(AMDGPU::LaneMaskConstants::get(ST)) {}
 
   bool run(MachineFunction &MF);
 };
@@ -54,7 +57,8 @@ class SILateBranchLoweringLegacy : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &MF) override {
     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
-    return SILateBranchLowering(ST, MDT).run(MF);
+    auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+    return SILateBranchLowering(ST, MDT, MLI).run(MF);
   }
 
   StringRef getPassName() const override {
@@ -64,6 +68,8 @@ class SILateBranchLoweringLegacy : public MachineFunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTreeWrapperPass>();
     AU.addPreserved<MachineDominatorTreeWrapperPass>();
+    AU.addRequired<MachineLoopInfoWrapperPass>();
+    AU.addPreserved<MachineLoopInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -75,6 +81,7 @@ char SILateBranchLoweringLegacy::ID = 0;
 INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
                       "SI insert s_cbranch_execz instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
                     "SI insert s_cbranch_execz instructions", false, false)
 
@@ -117,7 +124,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
 }
 
 static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
-                       MachineDominatorTree *MDT) {
+                       MachineDominatorTree *MDT, MachineLoopInfo *MLI) {
   MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
 
   // Update dominator tree
@@ -129,6 +136,10 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
   }
   DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
   MDT->applyUpdates(DTUpdates);
+
+  // Update loop info
+  if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
+    Loop->addBasicBlockToLoop(SplitBB, *MLI);
 }
 
 static void copyOpWithoutRegFlags(MachineInstrBuilder &MIB,
@@ -199,7 +210,7 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI,
   auto Next = std::next(MI.getIterator());
 
   if (Next != MBB.end() && !Next->isTerminator())
-    splitBlock(MBB, *BranchMI, MDT);
+    splitBlock(MBB, *BranchMI, MDT, MLI);
 
   MBB.addSuccessor(EarlyExitBlock);
   MDT->insertEdge(&MBB, EarlyExitBlock);
@@ -210,11 +221,14 @@ llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
                                     MachineFunctionAnalysisManager &MFAM) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
-  if (!SILateBranchLowering(ST, MDT).run(MF))
+  auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
+  if (!SILateBranchLowering(ST, MDT, MLI).run(MF))
     return PreservedAnalyses::all();
 
-  return getMachineFunctionPassPreservedAnalyses()
-      .preserve<MachineDominatorTreeAnalysis>();
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserve<MachineDominatorTreeAnalysis>();
+  PA.preserve<MachineLoopAnalysis>();
+  return PA;
 }
 
 bool SILateBranchLowering::run(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
new file mode 100644
index 0000000000000..99155e35ddcfe
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
@@ -0,0 +1,37 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="si-late-branch-lowering,print<machine-loops>" -filetype=null %s 2>&1 | FileCheck %s
+
+# Test that MachineLoopInfo is preserved when splitting a block inside a loop
+# due to early termination handling.
+
+# CHECK: Machine loop info for machine function 'early_term_in_loop':
+# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
+
+--- |
+  define amdgpu_ps void @early_term_in_loop() {
+    ret void
+  }
+...
+
+---
+name: early_term_in_loop
+tracksRegLiveness: true
+body: |
+  bb.0:
+    successors: %bb.1
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+
+  ; Loop header contains SI_EARLY_TERMINATE_SCC0 followed by more instructions.
+  ; This triggers block splitting. Both bb.1 and bb.4 must remain in the loop.
+  bb.1:
+    liveins: $vgpr0
+    successors: %bb.1, %bb.2
+    S_CMP_LG_U32 0, 1, implicit-def $scc
+    SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+
+  bb.2:
+    liveins: $vgpr0, $vgpr1
+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+    S_ENDPGM 0
+...

>From 1a8512728fa29f53964173b8a1e3e2bdc0dde454 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Tue, 27 Jan 2026 19:25:35 +0000
Subject: [PATCH 2/5] Addressed reviewer comment.

---
 llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index 2b955f0a6ad4c..83cf457dfac13 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -57,7 +57,8 @@ class SILateBranchLoweringLegacy : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &MF) override {
     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
     auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
-    auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+    auto *MLIWP = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
+    MachineLoopInfo *MLI = MLIWP ? &MLIWP->getLI() : nullptr;
     return SILateBranchLowering(ST, MDT, MLI).run(MF);
   }
 
@@ -68,7 +69,6 @@ class SILateBranchLoweringLegacy : public MachineFunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTreeWrapperPass>();
     AU.addPreserved<MachineDominatorTreeWrapperPass>();
-    AU.addRequired<MachineLoopInfoWrapperPass>();
     AU.addPreserved<MachineLoopInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -81,7 +81,6 @@ char SILateBranchLoweringLegacy::ID = 0;
 INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
                       "SI insert s_cbranch_execz instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
                     "SI insert s_cbranch_execz instructions", false, false)
 
@@ -137,9 +136,11 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
   DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
   MDT->applyUpdates(DTUpdates);
 
-  // Update loop info
-  if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
-    Loop->addBasicBlockToLoop(SplitBB, *MLI);
+  // Update loop info if available
+  if (MLI) {
+    if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
+      Loop->addBasicBlockToLoop(SplitBB, *MLI);
+  }
 }
 
 static void copyOpWithoutRegFlags(MachineInstrBuilder &MIB,
@@ -221,7 +222,7 @@ llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
                                     MachineFunctionAnalysisManager &MFAM) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
-  auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
+  auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(MF);
   if (!SILateBranchLowering(ST, MDT, MLI).run(MF))
     return PreservedAnalyses::all();
 

>From 70e19dc4d5e2500c25b8df168a5e7d6d3332cb2a Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Wed, 28 Jan 2026 02:55:54 +0000
Subject: [PATCH 3/5] Updated the test to correctly verify the changes.

---
 .../AMDGPU/si-late-branch-lowering-preserve-loop-info.mir | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
index 99155e35ddcfe..052b0b8f68b71 100644
--- a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
@@ -1,10 +1,14 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="si-late-branch-lowering,print<machine-loops>" -filetype=null %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-late-branch-lowering,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
 
 # Test that MachineLoopInfo is preserved when splitting a block inside a loop
 # due to early termination handling.
 
+# CHECK: Running analysis: MachineLoopAnalysis on early_term_in_loop
+# CHECK: Running pass: SILateBranchLoweringPass on early_term_in_loop
+# CHECK: Running pass: MachineLoopPrinterPass on early_term_in_loop
 # CHECK: Machine loop info for machine function 'early_term_in_loop':
-# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
+# CHECK-NOT: Running analysis: MachineLoopAnalysis on early_term_in_loop
+# CHECK: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
 
 --- |
   define amdgpu_ps void @early_term_in_loop() {

>From 3efb79620c71bc4f8dd95d3f2bbd4c32c07ac341 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Thu, 29 Jan 2026 10:24:19 +0000
Subject: [PATCH 4/5] Add check-next.

---
 .../si-late-branch-lowering-preserve-loop-info.mir       | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
index 052b0b8f68b71..f931a2d04a1fe 100644
--- a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
@@ -4,11 +4,12 @@
 # due to early termination handling.
 
 # CHECK: Running analysis: MachineLoopAnalysis on early_term_in_loop
-# CHECK: Running pass: SILateBranchLoweringPass on early_term_in_loop
-# CHECK: Running pass: MachineLoopPrinterPass on early_term_in_loop
-# CHECK: Machine loop info for machine function 'early_term_in_loop':
+# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on early_term_in_loop
+# CHECK-NEXT: Running pass: SILateBranchLoweringPass on early_term_in_loop
+# CHECK-NEXT: Running pass: MachineLoopPrinterPass on early_term_in_loop
+# CHECK-NEXT: Machine loop info for machine function 'early_term_in_loop':
 # CHECK-NOT: Running analysis: MachineLoopAnalysis on early_term_in_loop
-# CHECK: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
+# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
 
 --- |
   define amdgpu_ps void @early_term_in_loop() {

>From 8f1084cf1047940bc3f9bef27f747b43bda5041b Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Thu, 29 Jan 2026 14:06:16 +0000
Subject: [PATCH 5/5] Remove IR section from MIR test.

---
 .../AMDGPU/si-late-branch-lowering-preserve-loop-info.mir   | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
index f931a2d04a1fe..b5c9319f809c0 100644
--- a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir
@@ -11,12 +11,6 @@
 # CHECK-NOT: Running analysis: MachineLoopAnalysis on early_term_in_loop
 # CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
 
---- |
-  define amdgpu_ps void @early_term_in_loop() {
-    ret void
-  }
-...
-
 ---
 name: early_term_in_loop
 tracksRegLiveness: true



More information about the llvm-commits mailing list