[llvm] [AMDGPU] Teach SIPreEmitPeephole pass to preserve MachineLoopInfo (PR #178868)
Dark Steve via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 30 03:16:42 PST 2026
https://github.com/PrasoonMishra created https://github.com/llvm/llvm-project/pull/178868
SIPreEmitPeephole performs several optimizations, but only some of them modify the CFG.
Optimizations that do not modify the CFG:
- `optimizeSetGPR()` - removes redundant `S_SET_GPR_IDX_ON` instructions
- Unpacking of packed instructions (`V_PK_MUL_F32`, `V_PK_ADD_F32`, `V_PK_FMA_F32`) for MFMA co-issue - transforms packed instructions into unpacked equivalents
- `optimizeVccBranch()` - when removing redundant `S_AND_B64` after a VOPC comparison instruction (the branch itself remains unchanged)
Added a `CFGModified` flag that is only set when CFG-modifying transformations occur, allowing MLI to be preserved when CFG is not modified.
>From 4783861a22b836e1a63e433e0afdd3ae2203996b Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 30 Jan 2026 08:14:30 +0000
Subject: [PATCH 1/2] Preserve MLI info in SIPreEmitPeephole.
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 21 ++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 385127cb26eea..e2c7d89f9325f 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/BranchProbability.h"
@@ -37,8 +38,9 @@ class SIPreEmitPeephole {
private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
+ bool CFGModified = false;
- bool optimizeVccBranch(MachineInstr &MI) const;
+ bool optimizeVccBranch(MachineInstr &MI);
bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
MachineBasicBlock *&TrueMBB,
@@ -79,6 +81,7 @@ class SIPreEmitPeephole {
public:
bool run(MachineFunction &MF);
+ bool isCFGModified() const { return CFGModified; }
};
class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
@@ -103,7 +106,7 @@ char SIPreEmitPeepholeLegacy::ID = 0;
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
-bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
+bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) {
// Match:
// sreg = -1 or 0
// vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
@@ -228,6 +231,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
if (SReg == ExecReg) {
+ CFGModified = true;
// EXEC is updated directly
if (IsVCCZ) {
MI.eraseFromParent();
@@ -235,6 +239,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
}
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
} else if (IsVCCZ && MaskValue == 0) {
+ CFGModified = true;
// Will always branch
// Remove all successors shadowed by new unconditional branch
MachineBasicBlock *Parent = MI.getParent();
@@ -264,6 +269,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
} else if (!IsVCCZ && MaskValue == 0) {
// Will never branch
+ CFGModified = true;
MachineOperand &Dst = MI.getOperand(0);
assert(Dst.isMBB() && "destination is not basic block");
MI.getParent()->removeSuccessor(Dst.getMBB());
@@ -449,6 +455,7 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
MI.eraseFromParent();
SrcMBB.removeSuccessor(TrueMBB);
+ CFGModified = true;
return true;
}
@@ -709,9 +716,14 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *MDT = MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
auto *MPDT = MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
+ SIPreEmitPeephole Impl;
- if (SIPreEmitPeephole().run(MF))
- return getMachineFunctionPassPreservedAnalyses();
+ if (Impl.run(MF)) {
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ if (!Impl.isCFGModified())
+ PA.preserve<MachineLoopAnalysis>();
+ return PA;
+ }
if (MDT)
MDT->updateBlockNumbers();
@@ -724,6 +736,7 @@ bool SIPreEmitPeephole::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
+ CFGModified = false;
bool Changed = false;
MF.RenumberBlocks();
>From 49a9e9ab8a235213e904bf600a9b8177845688a5 Mon Sep 17 00:00:00 2001
From: Prasoon Mishra <Prasoon.Mishra at amd.com>
Date: Fri, 30 Jan 2026 09:34:35 +0000
Subject: [PATCH 2/2] Add test.
---
...i-pre-emit-peephole-preserve-loop-info.mir | 27 +++++++++++++++++++
1 file changed, 27 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir
diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir
new file mode 100644
index 0000000000000..4347117f7dc94
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir
@@ -0,0 +1,27 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-pre-emit-peephole,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
+
+# CHECK: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running pass: SIPreEmitPeepholePass on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Invalidating analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Running pass: MachineLoopPrinterPass on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Machine loop info for machine function 'vcc_and_removal_preserves_mli':
+# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
+# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><latch><exiting>
+
+---
+name: vcc_and_removal_preserves_mli
+body: |
+ bb.0:
+ S_BRANCH %bb.1
+
+ ; S_AND gets removed
+ bb.1:
+ V_CMP_EQ_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
+ $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list