[llvm] Move SI Lower Control Flow Up (PR #159557)
Patrick Simmons via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 04:36:30 PDT 2025
https://github.com/linuxrocks123 created https://github.com/llvm/llvm-project/pull/159557
Still needs tests and testing. Also needs camel case. If the snake case bothers anyone, I'll fix it now.
>From 5497afab88cc7b02964a3a2ae3cd29523cb29d31 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 15 Sep 2025 08:35:00 -0500
Subject: [PATCH 1/7] In-progress work
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 144 ++++++++++++++++++
1 file changed, 144 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
new file mode 100644
index 0000000000000..fc96c4370879e
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "SIInstrInfo.h"
+
+#include <cassert>
+#include <unordered_set>
+
+using namespace llvm;
+
+using std::unordered_set;
+using std::vector;
+
+static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MBB,
+ MachineBasicBlock &dest_MBB) {
+ auto& TII = *branching_MBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo();
+ for (MachineInstr &branch_MI : reverse(branching_MBB.instrs()))
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &dest_MBB)
+ return branch_MI;
+
+ llvm_unreachable("Don't call this if there's no branch to the destination.");
+}
+
+static inline void move_ins_before_phis(MachineInstr &MI,
+ MachineBasicBlock &MBB) {
+ MachineFunction& MF = *MBB.getParent();
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/)
+ MBB.insert(MBB.begin(), &MI);
+ else
+ for (auto* pred_MBB : MBB.predecessors())
+ {
+ MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) {
+ MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI);
+ pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
+ cloned_MI->bundleWithPred();
+ }
+ }
+
+ MI.eraseFromParent();
+}
+
+static inline MachineBasicBlock::instr_iterator
+get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) {
+ MachineFunction& MF = *pred_MBB.getParent();
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ for (MachineInstr &branch_MI : reverse(pred_MBB.instrs()))
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
+ return std::next(branch_MI.getIterator());
+ else
+ assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here.");
+
+ llvm_unreachable("There should always be a branch to succ_MBB.");
+}
+
+static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ struct CFG_Rewrite_Entry {
+ unordered_set<MachineBasicBlock *> pred_MBBs;
+ MachineBasicBlock *succ_MBB;
+ vector<MachineInstr*> body;
+ };
+
+ auto epilogs_are_identical = [](const vector<MachineInstr *> left,
+ const vector<MachineInstr *> right) {
+ if (left.size() != right.size())
+ return false;
+
+ for (unsigned i = 0; i < left.size(); i++)
+ if (!left[i]->isIdenticalTo(*right[i]))
+ return false;
+ return true;
+ };
+
+ auto move_body = [](vector<MachineInstr *> &body,
+ MachineBasicBlock &dest_MBB) {
+ for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
+ MachineInstr &body_ins = **rev_it;
+ body_ins.removeFromBundle();
+ dest_MBB.insert(dest_MBB.begin(), &body_ins);
+ }
+ };
+
+ vector<CFG_Rewrite_Entry> cfg_rewrite_entries;
+ for (MachineBasicBlock &MBB : MF) {
+ CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
+ for (MachineBasicBlock *pred_MBB : MBB.predecessors()) {
+ MachineBasicBlock::instr_iterator ep_it =
+ get_epilog_for_successor(*pred_MBB, MBB)->getIterator();
+
+ vector<MachineInstr *> epilog;
+ while (!ep_it.isEnd())
+ epilog.push_back(&*ep_it++);
+
+ if (!epilogs_are_identical(to_insert.body, epilog)) {
+ if (to_insert.pred_MBBs.size() && to_insert.body.size()) {
+ // Potentially, we need to insert a new entry. But first see if we
+ // can find an existing entry with the same epilog.
+ bool existing_entry_found = false;
+ for (auto rev_it = cfg_rewrite_entries.rbegin();
+ rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB;
+ rev_it++)
+ if (epilogs_are_identical(rev_it->body, epilog)) {
+ rev_it->pred_MBBs.insert(pred_MBB);
+ existing_entry_found = true;
+ break;
+ }
+
+ if(!existing_entry_found)
+ cfg_rewrite_entries.push_back(to_insert);
+ }
+ to_insert.pred_MBBs.clear();
+ to_insert.body = epilog;
+ }
+
+ to_insert.pred_MBBs.insert(pred_MBB);
+ }
+
+ // Handle the last potential rewrite entry. Lower instead of journaling a
+ // rewrite entry if all predecessor MBBs are in this single entry.
+ if (to_insert.pred_MBBs.size() == MBB.pred_size())
+ // Lower
+ move_body(to_insert.body,MBB);
+ else if (to_insert.body.size())
+ cfg_rewrite_entries.push_back(to_insert);
+
+ // Perform the journaled rewrites.
+ for (const auto &entry : cfg_rewrite_entries) {
+ MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
+ for(MachineBasicBlock* pred_MBB : entry.pred_MBBs)
+ {
+
+ }
+ }
+}
>From 38909c5c056f4f98485a14c67ccfc201b1b25515 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 05:01:32 -0500
Subject: [PATCH 2/7] Finish initial implementation ... I hope.
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 41 ++++++++++++++-----
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 3 ++
2 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index fc96c4370879e..81872f4fe8ab4 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -26,14 +26,17 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB
llvm_unreachable("Don't call this if there's no branch to the destination.");
}
-static inline void move_ins_before_phis(MachineInstr &MI,
- MachineBasicBlock &MBB) {
+static inline void move_ins_before_phis(MachineInstr &MI) {
+ MachineBasicBlock& MBB = *MI.getParent();
MachineFunction& MF = *MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/)
+ if (MBB.size() == 0 ||
+ MBB.front().getOpcode() !=
+ AMDGPU::PHI /*do we need to check for debug instructions here?*/) {
+ MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
- else
+ } else {
for (auto* pred_MBB : MBB.predecessors())
{
MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
@@ -43,8 +46,8 @@ static inline void move_ins_before_phis(MachineInstr &MI,
cloned_MI->bundleWithPred();
}
}
-
- MI.eraseFromParent();
+ MI.eraseFromParent();
+ }
}
static inline MachineBasicBlock::instr_iterator
@@ -132,13 +135,31 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
move_body(to_insert.body,MBB);
else if (to_insert.body.size())
cfg_rewrite_entries.push_back(to_insert);
+ }
// Perform the journaled rewrites.
- for (const auto &entry : cfg_rewrite_entries) {
+ for (auto &entry : cfg_rewrite_entries) {
MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
- for(MachineBasicBlock* pred_MBB : entry.pred_MBBs)
- {
-
+
+ // Deal with mezzanine to successor succession.
+ BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB);
+ mezzanine_MBB->addSuccessor(entry.succ_MBB);
+
+ // Move instructions to mezzanine block.
+ move_body(entry.body, *mezzanine_MBB);
+
+ for (MachineBasicBlock *pred_MBB : entry.pred_MBBs) {
+ //Deal with predecessor to mezzanine succession.
+ MachineInstr &branch_ins =
+ get_branch_with_dest(*pred_MBB, *entry.succ_MBB);
+ assert(branch_ins.getOperand(0).isMBB() && "Branch instruction isn't.");
+ branch_ins.getOperand(0).setMBB(mezzanine_MBB);
+ pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB);
+
+ // Delete instructions that were lowered from epilog
+ auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB);
+ while (!epilog_it.isEnd())
+ epilog_it++->eraseFromBundle();
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 115a020f44098..ee4d6709eeafc 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -48,6 +48,7 @@
/// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
//===----------------------------------------------------------------------===//
+#include "SICustomBranchBundles.h"
#include "SILowerControlFlow.h"
#include "AMDGPU.h"
#include "AMDGPULaneMaskUtils.h"
@@ -323,6 +324,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
+ move_ins_before_phis(*OrSaveExec);
+
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
MachineBasicBlock::iterator ElsePt(MI);
>From 15da6ccc03e2dbf6889f26fe71c57405a7a7c404 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 05:39:36 -0500
Subject: [PATCH 3/7] Now to write the pass to get rid of the bundles
---
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 +++++---
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 8 ++++++++
llvm/lib/Target/AMDGPU/SILowerControlFlow.h | 8 ++++++++
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 92a587b5771b6..fa454730364c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1563,7 +1563,7 @@ void GCNPassConfig::addFastRegAlloc() {
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
+ //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
@@ -1586,10 +1586,12 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (OptVGPRLiveRange)
insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID);
+ insertPass(&SIOptimizeVGPRLiveRangeLegacyID, &SILowerControlFlowLegacyID);
+
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
+ //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
if (EnableRewritePartialRegUses)
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
@@ -2256,7 +2258,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass<PHIEliminationPass>(SILowerControlFlowPass());
+ //insertPass<PHIEliminationPass>(SILowerControlFlowPass());
if (EnableRewritePartialRegUses)
insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass());
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ee4d6709eeafc..ea3d2ed61a808 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -153,6 +153,14 @@ class SILowerControlFlowLegacy : public MachineFunctionPass {
return "SI Lower control flow pseudo instructions";
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().setNoPHIs();
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
// Should preserve the same set that TwoAddressInstructions does.
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
index 23803c679c246..0f4df79952999 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
@@ -16,6 +16,14 @@ class SILowerControlFlowPass : public PassInfoMixin<SILowerControlFlowPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getRequiredProperties() const {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ MachineFunctionProperties getClearedProperties() const {
+ return MachineFunctionProperties().setNoPHIs();
+ }
};
} // namespace llvm
>From c7439fec4a567543a4132a85c42150ba6113bcf3 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 06:07:26 -0500
Subject: [PATCH 4/7] For my next act, I'll actually make this SSA form by
creating different VREGs in the hoisted instructions and merging them with
PHI.
---
llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 81872f4fe8ab4..51d1e84a47c4c 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -31,9 +31,14 @@ static inline void move_ins_before_phis(MachineInstr &MI) {
MachineFunction& MF = *MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- if (MBB.size() == 0 ||
- MBB.front().getOpcode() !=
- AMDGPU::PHI /*do we need to check for debug instructions here?*/) {
+ bool phi_seen = false;
+ for (MachineInstr &maybe_phi : MBB)
+ if (maybe_phi.getOpcode() == AMDGPU::PHI) {
+ phi_seen = true;
+ break;
+ }
+
+ if (!phi_seen) {
MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
} else {
>From deb796553adbb6d03e1a0cae50f299bfd1e417dc Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 17 Sep 2025 06:36:07 -0500
Subject: [PATCH 5/7] More work
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 136 +++++++++++++-----
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 6 +
2 files changed, 109 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 51d1e84a47c4c..18888975bdba0 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -4,6 +4,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/ErrorHandling.h"
#include "SIInstrInfo.h"
@@ -29,11 +30,13 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB
static inline void move_ins_before_phis(MachineInstr &MI) {
MachineBasicBlock& MBB = *MI.getParent();
MachineFunction& MF = *MBB.getParent();
- auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ auto& MRI = MF.getRegInfo();
bool phi_seen = false;
- for (MachineInstr &maybe_phi : MBB)
- if (maybe_phi.getOpcode() == AMDGPU::PHI) {
+ MachineBasicBlock::iterator first_phi;
+ for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++)
+ if (first_phi->getOpcode() == AMDGPU::PHI) {
phi_seen = true;
break;
}
@@ -42,20 +45,46 @@ static inline void move_ins_before_phis(MachineInstr &MI) {
MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
} else {
- for (auto* pred_MBB : MBB.predecessors())
- {
+ auto phi = BuildMI(MBB, first_phi, MI.getDebugLoc(), TII.get(AMDGPU::PHI),
+ MI.getOperand(0).getReg());
+ for (auto *pred_MBB : MBB.predecessors()) {
+ Register cloned_reg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
- if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) {
- MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI);
- pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
- cloned_MI->bundleWithPred();
- }
+ MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI);
+ cloned_MI->getOperand(0).setReg(cloned_reg);
+ phi.addReg(cloned_reg).addMBB(pred_MBB);
+ pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
+ cloned_MI->bundleWithPred();
}
MI.eraseFromParent();
}
}
-static inline MachineBasicBlock::instr_iterator
+struct Epilog_Iterator {
+ MachineBasicBlock::instr_iterator internal_it;
+ Epilog_Iterator(MachineBasicBlock::instr_iterator i) : internal_it(i) {}
+
+ bool operator==(const Epilog_Iterator &other) {
+ return internal_it == other.internal_it;
+ }
+ bool isEnd() { return internal_it.isEnd(); }
+ MachineInstr &operator*() { return *internal_it; }
+ MachineBasicBlock::instr_iterator operator->() { return internal_it; }
+ Epilog_Iterator &operator++() {
+ ++internal_it;
+ if (!internal_it.isEnd() && internal_it->isBranch())
+ internal_it = internal_it->getParent()->instr_end();
+ return *this;
+ }
+ Epilog_Iterator operator++(int ignored) {
+ Epilog_Iterator to_return = *this;
+ ++*this;
+ return to_return;
+ }
+
+};
+
+static inline Epilog_Iterator
get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) {
MachineFunction& MF = *pred_MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
@@ -69,6 +98,27 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB
llvm_unreachable("There should always be a branch to succ_MBB.");
}
+static inline bool epilogs_are_identical(const vector<MachineInstr *> left,
+ const vector<MachineInstr *> right,
+ const MachineBasicBlock &succ_MBB) {
+ if (left.size() != right.size())
+ return false;
+
+ for (unsigned i = 0; i < left.size(); i++)
+ if (!left[i]->isIdenticalTo(*right[i]))
+ return false;
+ return true;
+}
+
+static inline void move_body(vector<MachineInstr *> &body,
+ MachineBasicBlock &dest_MBB) {
+ for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
+ MachineInstr &body_ins = **rev_it;
+ body_ins.removeFromBundle();
+ dest_MBB.insert(dest_MBB.begin(), &body_ins);
+ }
+}
+
static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
@@ -78,26 +128,6 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
vector<MachineInstr*> body;
};
- auto epilogs_are_identical = [](const vector<MachineInstr *> left,
- const vector<MachineInstr *> right) {
- if (left.size() != right.size())
- return false;
-
- for (unsigned i = 0; i < left.size(); i++)
- if (!left[i]->isIdenticalTo(*right[i]))
- return false;
- return true;
- };
-
- auto move_body = [](vector<MachineInstr *> &body,
- MachineBasicBlock &dest_MBB) {
- for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
- MachineInstr &body_ins = **rev_it;
- body_ins.removeFromBundle();
- dest_MBB.insert(dest_MBB.begin(), &body_ins);
- }
- };
-
vector<CFG_Rewrite_Entry> cfg_rewrite_entries;
for (MachineBasicBlock &MBB : MF) {
CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
@@ -109,7 +139,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
while (!ep_it.isEnd())
epilog.push_back(&*ep_it++);
- if (!epilogs_are_identical(to_insert.body, epilog)) {
+ if (!epilogs_are_identical(to_insert.body, epilog, MBB)) {
if (to_insert.pred_MBBs.size() && to_insert.body.size()) {
// Potentially, we need to insert a new entry. But first see if we
// can find an existing entry with the same epilog.
@@ -117,7 +147,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
for (auto rev_it = cfg_rewrite_entries.rbegin();
rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB;
rev_it++)
- if (epilogs_are_identical(rev_it->body, epilog)) {
+ if (epilogs_are_identical(rev_it->body, epilog, MBB)) {
rev_it->pred_MBBs.insert(pred_MBB);
existing_entry_found = true;
break;
@@ -168,3 +198,43 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
}
}
}
+
+namespace std {
+ template <>
+ struct hash<Register>
+ {
+ std::size_t operator()(const Register& r) const
+ {
+ return hash<unsigned>()(r);
+ }
+ };
+}
+
+static inline void hoist_unrelated_copies(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &branch_MI : MBB) {
+ if (!branch_MI.isBranch())
+ continue;
+
+ unordered_set<Register> related_copy_sources;
+ Epilog_Iterator epilog_it = branch_MI.getIterator();
+ Epilog_Iterator copy_move_it = ++epilog_it;
+ while (!epilog_it.isEnd()) {
+ if (epilog_it->getOpcode() != AMDGPU::COPY)
+ related_copy_sources.insert(epilog_it->getOperand(0).getReg());
+ ++epilog_it;
+ }
+
+ while (!copy_move_it.isEnd()) {
+ Epilog_Iterator next = copy_move_it; ++next;
+ if (copy_move_it->getOpcode() == AMDGPU::COPY &&
+ !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) {
+ MachineInstr &MI_to_move = *copy_move_it;
+ MI_to_move.removeFromBundle();
+ MBB.insert(branch_MI.getIterator(),&MI_to_move);
+ }
+
+ copy_move_it = next;
+ }
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ea3d2ed61a808..c6b8ace9d9718 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -851,6 +851,12 @@ bool SILowerControlFlow::run(MachineFunction &MF) {
LoweredIf.clear();
KillBlocks.clear();
+ if (Changed)
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.isBundled())
+ MI.unbundleFromSucc();
+
return Changed;
}
>From 89db2df158c33b463e1bf9859d934cc5ec401973 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 18 Sep 2025 03:12:03 -0500
Subject: [PATCH 6/7] Works on simple testcase
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 ++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 ++
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 14 +++---
.../Target/AMDGPU/SIRestoreNormalEpilog.cpp | 46 +++++++++++++++++++
5 files changed, 60 insertions(+), 7 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0f2c33585884f..f402efb2ea86e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID;
void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &);
extern char &AMDGPUPreloadKernelArgumentsLegacyID;
+void initializeSIRestoreNormalEpilogLegacyPass(PassRegistry &);
+extern char &SIRestoreNormalEpilogLegacyID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fa454730364c4..8210ad88b3d3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -597,6 +597,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesLegacyPass(*PR);
+ initializeSIRestoreNormalEpilogLegacyPass(*PR);
initializeSIPostRABundlerLegacyPass(*PR);
initializeGCNCreateVOPDLegacyPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
@@ -1595,6 +1596,8 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (EnableRewritePartialRegUses)
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
+
+ insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID);
if (isPassEnabled(EnablePreRAOptimizations))
insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index aae56eef73edd..996cb635b3c9c 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -183,6 +183,7 @@ add_llvm_target(AMDGPUCodeGen
SIPreEmitPeephole.cpp
SIProgramInfo.cpp
SIRegisterInfo.cpp
+ SIRestoreNormalEpilog.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 18888975bdba0..0f33a10ed8f33 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -91,9 +91,7 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB
for (MachineInstr &branch_MI : reverse(pred_MBB.instrs()))
if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
- return std::next(branch_MI.getIterator());
- else
- assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here.");
+ return ++Epilog_Iterator(branch_MI.getIterator());
llvm_unreachable("There should always be a branch to succ_MBB.");
}
@@ -132,8 +130,8 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
for (MachineBasicBlock *pred_MBB : MBB.predecessors()) {
- MachineBasicBlock::instr_iterator ep_it =
- get_epilog_for_successor(*pred_MBB, MBB)->getIterator();
+ Epilog_Iterator ep_it =
+ get_epilog_for_successor(*pred_MBB, MBB);
vector<MachineInstr *> epilog;
while (!ep_it.isEnd())
@@ -175,6 +173,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
// Perform the journaled rewrites.
for (auto &entry : cfg_rewrite_entries) {
MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
+ MF.insert(MF.end(),mezzanine_MBB);
// Deal with mezzanine to successor succession.
BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB);
@@ -192,7 +191,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB);
// Delete instructions that were lowered from epilog
- auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB);
+ auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
while (!epilog_it.isEnd())
epilog_it++->eraseFromBundle();
}
@@ -228,7 +227,8 @@ static inline void hoist_unrelated_copies(MachineFunction &MF) {
while (!copy_move_it.isEnd()) {
Epilog_Iterator next = copy_move_it; ++next;
if (copy_move_it->getOpcode() == AMDGPU::COPY &&
- !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) {
+ !related_copy_sources.count(copy_move_it->getOperand(1).getReg())
+ || copy_move_it->getOpcode() == AMDGPU::IMPLICIT_DEF) {
MachineInstr &MI_to_move = *copy_move_it;
MI_to_move.removeFromBundle();
MBB.insert(branch_MI.getIterator(),&MI_to_move);
diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
new file mode 100644
index 0000000000000..ef565a9d9ad8f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
@@ -0,0 +1,46 @@
+#include "SICustomBranchBundles.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-restore-normal-epilog"
+
+namespace
+{
+
+class SIRestoreNormalEpilogLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ hoist_unrelated_copies(MF);
+ normalize_ir_post_phi_elimination(MF);
+ return true;
+ }
+
+ StringRef getPassName() const override {
+ return "SI Restore Normal Epilog Post PHI Elimination";
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoPHIs();
+ }
+
+};
+
+} // namespace
+
+INITIALIZE_PASS(SIRestoreNormalEpilogLegacy, DEBUG_TYPE,
+ "SI restore normal epilog", false, false)
+
+char SIRestoreNormalEpilogLegacy::ID;
+char &llvm::SIRestoreNormalEpilogLegacyID = SIRestoreNormalEpilogLegacy::ID;
>From 4d4f71aee04526c30a8cf761f42cfa20e55882de Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 18 Sep 2025 06:31:57 -0500
Subject: [PATCH 7/7] Must delete from all predecessors if lowering.
---
llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 0f33a10ed8f33..0e043b09fa136 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -163,9 +163,18 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
// Handle the last potential rewrite entry. Lower instead of journaling a
// rewrite entry if all predecessor MBBs are in this single entry.
- if (to_insert.pred_MBBs.size() == MBB.pred_size())
- // Lower
- move_body(to_insert.body,MBB);
+ if (to_insert.pred_MBBs.size() == MBB.pred_size()) {
+ move_body(to_insert.body, MBB);
+ for (MachineBasicBlock *pred_MBB : to_insert.pred_MBBs) {
+ // Delete instructions that were lowered from epilog
+ MachineInstr &branch_ins =
+ get_branch_with_dest(*pred_MBB, *to_insert.succ_MBB);
+ auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
+ while (!epilog_it.isEnd())
+ epilog_it++->eraseFromBundle();
+ }
+
+ }
else if (to_insert.body.size())
cfg_rewrite_entries.push_back(to_insert);
}
More information about the llvm-commits
mailing list