[llvm] Move SI Lower Control Flow Up (PR #159557)
Patrick Simmons via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 9 12:30:25 PDT 2025
https://github.com/linuxrocks123 updated https://github.com/llvm/llvm-project/pull/159557
>From fc0b71dd14825c2ff2af0ee14e2026f2bb257d7e Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Mon, 15 Sep 2025 08:35:00 -0500
Subject: [PATCH 1/9] In-progress work
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 144 ++++++++++++++++++
1 file changed, 144 insertions(+)
create mode 100644 llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
new file mode 100644
index 0000000000000..fc96c4370879e
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "SIInstrInfo.h"
+
+#include <cassert>
+#include <unordered_set>
+
+using namespace llvm;
+
+using std::unordered_set;
+using std::vector;
+
+static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MBB,
+ MachineBasicBlock &dest_MBB) {
+ auto& TII = *branching_MBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo();
+ for (MachineInstr &branch_MI : reverse(branching_MBB.instrs()))
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &dest_MBB)
+ return branch_MI;
+
+ llvm_unreachable("Don't call this if there's no branch to the destination.");
+}
+
+static inline void move_ins_before_phis(MachineInstr &MI,
+ MachineBasicBlock &MBB) {
+ MachineFunction& MF = *MBB.getParent();
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/)
+ MBB.insert(MBB.begin(), &MI);
+ else
+ for (auto* pred_MBB : MBB.predecessors())
+ {
+ MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) {
+ MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI);
+ pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
+ cloned_MI->bundleWithPred();
+ }
+ }
+
+ MI.eraseFromParent();
+}
+
+static inline MachineBasicBlock::instr_iterator
+get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) {
+ MachineFunction& MF = *pred_MBB.getParent();
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ for (MachineInstr &branch_MI : reverse(pred_MBB.instrs()))
+ if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
+ return std::next(branch_MI.getIterator());
+ else
+ assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here.");
+
+ llvm_unreachable("There should always be a branch to succ_MBB.");
+}
+
+static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
+ auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+
+ struct CFG_Rewrite_Entry {
+ unordered_set<MachineBasicBlock *> pred_MBBs;
+ MachineBasicBlock *succ_MBB;
+ vector<MachineInstr*> body;
+ };
+
+ auto epilogs_are_identical = [](const vector<MachineInstr *> left,
+ const vector<MachineInstr *> right) {
+ if (left.size() != right.size())
+ return false;
+
+ for (unsigned i = 0; i < left.size(); i++)
+ if (!left[i]->isIdenticalTo(*right[i]))
+ return false;
+ return true;
+ };
+
+ auto move_body = [](vector<MachineInstr *> &body,
+ MachineBasicBlock &dest_MBB) {
+ for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
+ MachineInstr &body_ins = **rev_it;
+ body_ins.removeFromBundle();
+ dest_MBB.insert(dest_MBB.begin(), &body_ins);
+ }
+ };
+
+ vector<CFG_Rewrite_Entry> cfg_rewrite_entries;
+ for (MachineBasicBlock &MBB : MF) {
+ CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
+ for (MachineBasicBlock *pred_MBB : MBB.predecessors()) {
+ MachineBasicBlock::instr_iterator ep_it =
+ get_epilog_for_successor(*pred_MBB, MBB)->getIterator();
+
+ vector<MachineInstr *> epilog;
+ while (!ep_it.isEnd())
+ epilog.push_back(&*ep_it++);
+
+ if (!epilogs_are_identical(to_insert.body, epilog)) {
+ if (to_insert.pred_MBBs.size() && to_insert.body.size()) {
+ // Potentially, we need to insert a new entry. But first see if we
+ // can find an existing entry with the same epilog.
+ bool existing_entry_found = false;
+ for (auto rev_it = cfg_rewrite_entries.rbegin();
+ rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB;
+ rev_it++)
+ if (epilogs_are_identical(rev_it->body, epilog)) {
+ rev_it->pred_MBBs.insert(pred_MBB);
+ existing_entry_found = true;
+ break;
+ }
+
+ if(!existing_entry_found)
+ cfg_rewrite_entries.push_back(to_insert);
+ }
+ to_insert.pred_MBBs.clear();
+ to_insert.body = epilog;
+ }
+
+ to_insert.pred_MBBs.insert(pred_MBB);
+ }
+
+ // Handle the last potential rewrite entry. Lower instead of journaling a
+ // rewrite entry if all predecessor MBBs are in this single entry.
+ if (to_insert.pred_MBBs.size() == MBB.pred_size())
+ // Lower
+ move_body(to_insert.body,MBB);
+ else if (to_insert.body.size())
+ cfg_rewrite_entries.push_back(to_insert);
+
+ // Perform the journaled rewrites.
+ for (const auto &entry : cfg_rewrite_entries) {
+ MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
+ for(MachineBasicBlock* pred_MBB : entry.pred_MBBs)
+ {
+
+ }
+ }
+}
>From 98ef49eb4e5feb9820d96225d1bb7bcb7e692cfc Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 05:01:32 -0500
Subject: [PATCH 2/9] Finish initial implementation ... I hope.
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 41 ++++++++++++++-----
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 3 ++
2 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index fc96c4370879e..81872f4fe8ab4 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -26,14 +26,17 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB
llvm_unreachable("Don't call this if there's no branch to the destination.");
}
-static inline void move_ins_before_phis(MachineInstr &MI,
- MachineBasicBlock &MBB) {
+static inline void move_ins_before_phis(MachineInstr &MI) {
+ MachineBasicBlock& MBB = *MI.getParent();
MachineFunction& MF = *MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- if (MBB.size() == 0 || MBB.front().getOpcode() != AMDGPU::PHI /*do we need to check for debug instructions here?*/)
+ if (MBB.size() == 0 ||
+ MBB.front().getOpcode() !=
+ AMDGPU::PHI /*do we need to check for debug instructions here?*/) {
+ MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
- else
+ } else {
for (auto* pred_MBB : MBB.predecessors())
{
MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
@@ -43,8 +46,8 @@ static inline void move_ins_before_phis(MachineInstr &MI,
cloned_MI->bundleWithPred();
}
}
-
- MI.eraseFromParent();
+ MI.eraseFromParent();
+ }
}
static inline MachineBasicBlock::instr_iterator
@@ -132,13 +135,31 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
move_body(to_insert.body,MBB);
else if (to_insert.body.size())
cfg_rewrite_entries.push_back(to_insert);
+ }
// Perform the journaled rewrites.
- for (const auto &entry : cfg_rewrite_entries) {
+ for (auto &entry : cfg_rewrite_entries) {
MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
- for(MachineBasicBlock* pred_MBB : entry.pred_MBBs)
- {
-
+
+ // Deal with mezzanine to successor succession.
+ BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB);
+ mezzanine_MBB->addSuccessor(entry.succ_MBB);
+
+ // Move instructions to mezzanine block.
+ move_body(entry.body, *mezzanine_MBB);
+
+ for (MachineBasicBlock *pred_MBB : entry.pred_MBBs) {
+ //Deal with predecessor to mezzanine succession.
+ MachineInstr &branch_ins =
+ get_branch_with_dest(*pred_MBB, *entry.succ_MBB);
+ assert(branch_ins.getOperand(0).isMBB() && "Branch instruction isn't.");
+ branch_ins.getOperand(0).setMBB(mezzanine_MBB);
+ pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB);
+
+ // Delete instructions that were lowered from epilog
+ auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB);
+ while (!epilog_it.isEnd())
+ epilog_it++->eraseFromBundle();
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 8586d6c18b361..174cb77d57ff8 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -48,6 +48,7 @@
/// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits
//===----------------------------------------------------------------------===//
+#include "SICustomBranchBundles.h"
#include "SILowerControlFlow.h"
#include "AMDGPU.h"
#include "AMDGPULaneMaskUtils.h"
@@ -322,6 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
+ move_ins_before_phis(*OrSaveExec);
+
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
MachineBasicBlock::iterator ElsePt(MI);
>From a4497c51294c22379ed8cbe46c45c2d4684f6f21 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 05:39:36 -0500
Subject: [PATCH 3/9] Now to write the pass to get rid of the bundles
---
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 +++++---
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 8 ++++++++
llvm/lib/Target/AMDGPU/SILowerControlFlow.h | 8 ++++++++
3 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 723d07e8ea079..672c92113f59b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1557,7 +1557,7 @@ void GCNPassConfig::addFastRegAlloc() {
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
+ //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
@@ -1580,10 +1580,12 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (OptVGPRLiveRange)
insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID);
+ insertPass(&SIOptimizeVGPRLiveRangeLegacyID, &SILowerControlFlowLegacyID);
+
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
+ //insertPass(&PHIEliminationID, &SILowerControlFlowLegacyID);
if (EnableRewritePartialRegUses)
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
@@ -2241,7 +2243,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass<PHIEliminationPass>(SILowerControlFlowPass());
+ //insertPass<PHIEliminationPass>(SILowerControlFlowPass());
if (EnableRewritePartialRegUses)
insertPass<RenameIndependentSubregsPass>(GCNRewritePartialRegUsesPass());
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 174cb77d57ff8..e412869e8b643 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -153,6 +153,14 @@ class SILowerControlFlowLegacy : public MachineFunctionPass {
return "SI Lower control flow pseudo instructions";
}
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties().setNoPHIs();
+ }
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
// Should preserve the same set that TwoAddressInstructions does.
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
index 23803c679c246..0f4df79952999 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.h
@@ -16,6 +16,14 @@ class SILowerControlFlowPass : public PassInfoMixin<SILowerControlFlowPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getRequiredProperties() const {
+ return MachineFunctionProperties().setIsSSA();
+ }
+
+ MachineFunctionProperties getClearedProperties() const {
+ return MachineFunctionProperties().setNoPHIs();
+ }
};
} // namespace llvm
>From d378e9945466ec1bc2016394742a50ae58dee37c Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 16 Sep 2025 06:07:26 -0500
Subject: [PATCH 4/9] For my next act, I'll actually make this SSA form by
creating different VREGs in the hoisted instructions and merging them with
PHI.
---
llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 81872f4fe8ab4..51d1e84a47c4c 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -31,9 +31,14 @@ static inline void move_ins_before_phis(MachineInstr &MI) {
MachineFunction& MF = *MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- if (MBB.size() == 0 ||
- MBB.front().getOpcode() !=
- AMDGPU::PHI /*do we need to check for debug instructions here?*/) {
+ bool phi_seen = false;
+ for (MachineInstr &maybe_phi : MBB)
+ if (maybe_phi.getOpcode() == AMDGPU::PHI) {
+ phi_seen = true;
+ break;
+ }
+
+ if (!phi_seen) {
MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
} else {
>From 7d2b970e09029ac261a199ee196d2db9a1e9b839 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Wed, 17 Sep 2025 06:36:07 -0500
Subject: [PATCH 5/9] More work
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 136 +++++++++++++-----
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 6 +
2 files changed, 109 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 51d1e84a47c4c..18888975bdba0 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -4,6 +4,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Support/ErrorHandling.h"
#include "SIInstrInfo.h"
@@ -29,11 +30,13 @@ static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MB
static inline void move_ins_before_phis(MachineInstr &MI) {
MachineBasicBlock& MBB = *MI.getParent();
MachineFunction& MF = *MBB.getParent();
- auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ auto& MRI = MF.getRegInfo();
bool phi_seen = false;
- for (MachineInstr &maybe_phi : MBB)
- if (maybe_phi.getOpcode() == AMDGPU::PHI) {
+ MachineBasicBlock::iterator first_phi;
+ for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++)
+ if (first_phi->getOpcode() == AMDGPU::PHI) {
phi_seen = true;
break;
}
@@ -42,20 +45,46 @@ static inline void move_ins_before_phis(MachineInstr &MI) {
MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
} else {
- for (auto* pred_MBB : MBB.predecessors())
- {
+ auto phi = BuildMI(MBB, first_phi, MI.getDebugLoc(), TII.get(AMDGPU::PHI),
+ MI.getOperand(0).getReg());
+ for (auto *pred_MBB : MBB.predecessors()) {
+ Register cloned_reg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
- if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &MBB) {
- MachineInstr* cloned_MI = MF.CloneMachineInstr(&MI);
- pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
- cloned_MI->bundleWithPred();
- }
+ MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI);
+ cloned_MI->getOperand(0).setReg(cloned_reg);
+ phi.addReg(cloned_reg).addMBB(pred_MBB);
+ pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
+ cloned_MI->bundleWithPred();
}
MI.eraseFromParent();
}
}
-static inline MachineBasicBlock::instr_iterator
+struct Epilog_Iterator {
+ MachineBasicBlock::instr_iterator internal_it;
+ Epilog_Iterator(MachineBasicBlock::instr_iterator i) : internal_it(i) {}
+
+ bool operator==(const Epilog_Iterator &other) {
+ return internal_it == other.internal_it;
+ }
+ bool isEnd() { return internal_it.isEnd(); }
+ MachineInstr &operator*() { return *internal_it; }
+ MachineBasicBlock::instr_iterator operator->() { return internal_it; }
+ Epilog_Iterator &operator++() {
+ ++internal_it;
+ if (!internal_it.isEnd() && internal_it->isBranch())
+ internal_it = internal_it->getParent()->instr_end();
+ return *this;
+ }
+ Epilog_Iterator operator++(int ignored) {
+ Epilog_Iterator to_return = *this;
+ ++*this;
+ return to_return;
+ }
+
+};
+
+static inline Epilog_Iterator
get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) {
MachineFunction& MF = *pred_MBB.getParent();
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
@@ -69,6 +98,27 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB
llvm_unreachable("There should always be a branch to succ_MBB.");
}
+static inline bool epilogs_are_identical(const vector<MachineInstr *> left,
+ const vector<MachineInstr *> right,
+ const MachineBasicBlock &succ_MBB) {
+ if (left.size() != right.size())
+ return false;
+
+ for (unsigned i = 0; i < left.size(); i++)
+ if (!left[i]->isIdenticalTo(*right[i]))
+ return false;
+ return true;
+}
+
+static inline void move_body(vector<MachineInstr *> &body,
+ MachineBasicBlock &dest_MBB) {
+ for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
+ MachineInstr &body_ins = **rev_it;
+ body_ins.removeFromBundle();
+ dest_MBB.insert(dest_MBB.begin(), &body_ins);
+ }
+}
+
static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
@@ -78,26 +128,6 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
vector<MachineInstr*> body;
};
- auto epilogs_are_identical = [](const vector<MachineInstr *> left,
- const vector<MachineInstr *> right) {
- if (left.size() != right.size())
- return false;
-
- for (unsigned i = 0; i < left.size(); i++)
- if (!left[i]->isIdenticalTo(*right[i]))
- return false;
- return true;
- };
-
- auto move_body = [](vector<MachineInstr *> &body,
- MachineBasicBlock &dest_MBB) {
- for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
- MachineInstr &body_ins = **rev_it;
- body_ins.removeFromBundle();
- dest_MBB.insert(dest_MBB.begin(), &body_ins);
- }
- };
-
vector<CFG_Rewrite_Entry> cfg_rewrite_entries;
for (MachineBasicBlock &MBB : MF) {
CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
@@ -109,7 +139,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
while (!ep_it.isEnd())
epilog.push_back(&*ep_it++);
- if (!epilogs_are_identical(to_insert.body, epilog)) {
+ if (!epilogs_are_identical(to_insert.body, epilog, MBB)) {
if (to_insert.pred_MBBs.size() && to_insert.body.size()) {
// Potentially, we need to insert a new entry. But first see if we
// can find an existing entry with the same epilog.
@@ -117,7 +147,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
for (auto rev_it = cfg_rewrite_entries.rbegin();
rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB;
rev_it++)
- if (epilogs_are_identical(rev_it->body, epilog)) {
+ if (epilogs_are_identical(rev_it->body, epilog, MBB)) {
rev_it->pred_MBBs.insert(pred_MBB);
existing_entry_found = true;
break;
@@ -168,3 +198,43 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
}
}
}
+
+namespace std {
+ template <>
+ struct hash<Register>
+ {
+ std::size_t operator()(const Register& r) const
+ {
+ return hash<unsigned>()(r);
+ }
+ };
+}
+
+static inline void hoist_unrelated_copies(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &branch_MI : MBB) {
+ if (!branch_MI.isBranch())
+ continue;
+
+ unordered_set<Register> related_copy_sources;
+ Epilog_Iterator epilog_it = branch_MI.getIterator();
+ Epilog_Iterator copy_move_it = ++epilog_it;
+ while (!epilog_it.isEnd()) {
+ if (epilog_it->getOpcode() != AMDGPU::COPY)
+ related_copy_sources.insert(epilog_it->getOperand(0).getReg());
+ ++epilog_it;
+ }
+
+ while (!copy_move_it.isEnd()) {
+ Epilog_Iterator next = copy_move_it; ++next;
+ if (copy_move_it->getOpcode() == AMDGPU::COPY &&
+ !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) {
+ MachineInstr &MI_to_move = *copy_move_it;
+ MI_to_move.removeFromBundle();
+ MBB.insert(branch_MI.getIterator(),&MI_to_move);
+ }
+
+ copy_move_it = next;
+ }
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index e412869e8b643..f069d48c25ec6 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -850,6 +850,12 @@ bool SILowerControlFlow::run(MachineFunction &MF) {
LoweredIf.clear();
KillBlocks.clear();
+ if (Changed)
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (MI.isBundled())
+ MI.unbundleFromSucc();
+
return Changed;
}
>From 4492864fa842512ac67b08b7556ad31a5d61f231 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 18 Sep 2025 03:12:03 -0500
Subject: [PATCH 6/9] Works on simple testcase
---
llvm/lib/Target/AMDGPU/AMDGPU.h | 3 ++
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 ++
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 14 +++---
.../Target/AMDGPU/SIRestoreNormalEpilog.cpp | 46 +++++++++++++++++++
5 files changed, 60 insertions(+), 7 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0f2c33585884f..f402efb2ea86e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -248,6 +248,9 @@ extern char &AMDGPUPreloadKernArgPrologLegacyID;
void initializeAMDGPUPreloadKernelArgumentsLegacyPass(PassRegistry &);
extern char &AMDGPUPreloadKernelArgumentsLegacyID;
+void initializeSIRestoreNormalEpilogLegacyPass(PassRegistry &);
+extern char &SIRestoreNormalEpilogLegacyID;
+
// Passes common to R600 and SI
FunctionPass *createAMDGPUPromoteAlloca();
void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 672c92113f59b..75f24e55592ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -597,6 +597,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesLegacyPass(*PR);
+ initializeSIRestoreNormalEpilogLegacyPass(*PR);
initializeSIPostRABundlerLegacyPass(*PR);
initializeGCNCreateVOPDLegacyPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
@@ -1589,6 +1590,8 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (EnableRewritePartialRegUses)
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
+
+ insertPass(&RenameIndependentSubregsID,&SIRestoreNormalEpilogLegacyID);
if (isPassEnabled(EnablePreRAOptimizations))
insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index aae56eef73edd..996cb635b3c9c 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -183,6 +183,7 @@ add_llvm_target(AMDGPUCodeGen
SIPreEmitPeephole.cpp
SIProgramInfo.cpp
SIRegisterInfo.cpp
+ SIRestoreNormalEpilog.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 18888975bdba0..0f33a10ed8f33 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -91,9 +91,7 @@ get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MB
for (MachineInstr &branch_MI : reverse(pred_MBB.instrs()))
if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
- return std::next(branch_MI.getIterator());
- else
- assert(branch_MI.isBranch() && "Shouldn't have fall-throughs here.");
+ return ++Epilog_Iterator(branch_MI.getIterator());
llvm_unreachable("There should always be a branch to succ_MBB.");
}
@@ -132,8 +130,8 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
for (MachineBasicBlock *pred_MBB : MBB.predecessors()) {
- MachineBasicBlock::instr_iterator ep_it =
- get_epilog_for_successor(*pred_MBB, MBB)->getIterator();
+ Epilog_Iterator ep_it =
+ get_epilog_for_successor(*pred_MBB, MBB);
vector<MachineInstr *> epilog;
while (!ep_it.isEnd())
@@ -175,6 +173,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
// Perform the journaled rewrites.
for (auto &entry : cfg_rewrite_entries) {
MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
+ MF.insert(MF.end(),mezzanine_MBB);
// Deal with mezzanine to successor succession.
BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB);
@@ -192,7 +191,7 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB);
// Delete instructions that were lowered from epilog
- auto epilog_it = get_epilog_for_successor(*pred_MBB, *entry.succ_MBB);
+ auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
while (!epilog_it.isEnd())
epilog_it++->eraseFromBundle();
}
@@ -228,7 +227,8 @@ static inline void hoist_unrelated_copies(MachineFunction &MF) {
while (!copy_move_it.isEnd()) {
Epilog_Iterator next = copy_move_it; ++next;
if (copy_move_it->getOpcode() == AMDGPU::COPY &&
- !related_copy_sources.count(copy_move_it->getOperand(1).getReg())) {
+ !related_copy_sources.count(copy_move_it->getOperand(1).getReg())
+ || copy_move_it->getOpcode() == AMDGPU::IMPLICIT_DEF) {
MachineInstr &MI_to_move = *copy_move_it;
MI_to_move.removeFromBundle();
MBB.insert(branch_MI.getIterator(),&MI_to_move);
diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
new file mode 100644
index 0000000000000..ef565a9d9ad8f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
@@ -0,0 +1,46 @@
+#include "SICustomBranchBundles.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-restore-normal-epilog"
+
+namespace
+{
+
+class SIRestoreNormalEpilogLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ hoist_unrelated_copies(MF);
+ normalize_ir_post_phi_elimination(MF);
+ return true;
+ }
+
+ StringRef getPassName() const override {
+ return "SI Restore Normal Epilog Post PHI Elimination";
+ }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().setNoPHIs();
+ }
+
+};
+
+} // namespace
+
+INITIALIZE_PASS(SIRestoreNormalEpilogLegacy, DEBUG_TYPE,
+ "SI restore normal epilog", false, false)
+
+char SIRestoreNormalEpilogLegacy::ID;
+char &llvm::SIRestoreNormalEpilogLegacyID = SIRestoreNormalEpilogLegacy::ID;
>From 26250941bafacb3c53242924f78127ccdf1a9a12 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 18 Sep 2025 06:31:57 -0500
Subject: [PATCH 7/9] Must delete from all predecessors if lowering.
---
llvm/lib/Target/AMDGPU/SICustomBranchBundles.h | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 0f33a10ed8f33..0e043b09fa136 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -163,9 +163,18 @@ static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
// Handle the last potential rewrite entry. Lower instead of journaling a
// rewrite entry if all predecessor MBBs are in this single entry.
- if (to_insert.pred_MBBs.size() == MBB.pred_size())
- // Lower
- move_body(to_insert.body,MBB);
+ if (to_insert.pred_MBBs.size() == MBB.pred_size()) {
+ move_body(to_insert.body, MBB);
+ for (MachineBasicBlock *pred_MBB : to_insert.pred_MBBs) {
+ // Delete instructions that were lowered from epilog
+ MachineInstr &branch_ins =
+ get_branch_with_dest(*pred_MBB, *to_insert.succ_MBB);
+ auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
+ while (!epilog_it.isEnd())
+ epilog_it++->eraseFromBundle();
+ }
+
+ }
else if (to_insert.body.size())
cfg_rewrite_entries.push_back(to_insert);
}
>From 76b784d4a4891233046f7f5d5fcbb799d972d099 Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Tue, 23 Sep 2025 01:30:40 -0500
Subject: [PATCH 8/9] clang-format and camelcaser
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 292 +++++++++---------
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 +-
.../Target/AMDGPU/SIRestoreNormalEpilog.cpp | 4 +-
3 files changed, 147 insertions(+), 151 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index 0e043b09fa136..acb547236b1eb 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -17,233 +17,229 @@ using namespace llvm;
using std::unordered_set;
using std::vector;
-static inline MachineInstr &get_branch_with_dest(MachineBasicBlock &branching_MBB,
- MachineBasicBlock &dest_MBB) {
- auto& TII = *branching_MBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo();
- for (MachineInstr &branch_MI : reverse(branching_MBB.instrs()))
- if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &dest_MBB)
- return branch_MI;
+static inline MachineInstr &getBranchWithDest(MachineBasicBlock &BranchingMBB,
+ MachineBasicBlock &DestMBB) {
+ auto &TII =
+ *BranchingMBB.getParent()->getSubtarget<GCNSubtarget>().getInstrInfo();
+ for (MachineInstr &BranchMI : reverse(BranchingMBB.instrs()))
+ if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &DestMBB)
+ return BranchMI;
llvm_unreachable("Don't call this if there's no branch to the destination.");
}
-static inline void move_ins_before_phis(MachineInstr &MI) {
- MachineBasicBlock& MBB = *MI.getParent();
- MachineFunction& MF = *MBB.getParent();
+static inline void moveInsBeforePhis(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- auto& MRI = MF.getRegInfo();
+ auto &MRI = MF.getRegInfo();
- bool phi_seen = false;
- MachineBasicBlock::iterator first_phi;
- for (first_phi = MBB.begin(); first_phi != MBB.end(); first_phi++)
- if (first_phi->getOpcode() == AMDGPU::PHI) {
- phi_seen = true;
+ bool PhiSeen = false;
+ MachineBasicBlock::iterator FirstPhi;
+ for (FirstPhi = MBB.begin(); FirstPhi != MBB.end(); FirstPhi++)
+ if (FirstPhi->getOpcode() == AMDGPU::PHI) {
+ PhiSeen = true;
break;
}
-
- if (!phi_seen) {
+
+ if (!PhiSeen) {
MI.removeFromParent();
MBB.insert(MBB.begin(), &MI);
} else {
- auto phi = BuildMI(MBB, first_phi, MI.getDebugLoc(), TII.get(AMDGPU::PHI),
- MI.getOperand(0).getReg());
- for (auto *pred_MBB : MBB.predecessors()) {
- Register cloned_reg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
- MachineInstr& branch_MI = get_branch_with_dest(*pred_MBB,MBB);
- MachineInstr *cloned_MI = MF.CloneMachineInstr(&MI);
- cloned_MI->getOperand(0).setReg(cloned_reg);
- phi.addReg(cloned_reg).addMBB(pred_MBB);
- pred_MBB->insertAfterBundle(branch_MI.getIterator(), cloned_MI);
- cloned_MI->bundleWithPred();
+ auto Phi = BuildMI(MBB, FirstPhi, MI.getDebugLoc(), TII.get(AMDGPU::PHI),
+ MI.getOperand(0).getReg());
+ for (auto *PredMBB : MBB.predecessors()) {
+ Register ClonedReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr &BranchMI = getBranchWithDest(*PredMBB, MBB);
+ MachineInstr *ClonedMI = MF.CloneMachineInstr(&MI);
+ ClonedMI->getOperand(0).setReg(ClonedReg);
+ Phi.addReg(ClonedReg).addMBB(PredMBB);
+ PredMBB->insertAfterBundle(BranchMI.getIterator(), ClonedMI);
+ ClonedMI->bundleWithPred();
}
MI.eraseFromParent();
}
}
-struct Epilog_Iterator {
- MachineBasicBlock::instr_iterator internal_it;
- Epilog_Iterator(MachineBasicBlock::instr_iterator i) : internal_it(i) {}
+struct EpilogIterator {
+ MachineBasicBlock::instr_iterator InternalIt;
+ EpilogIterator(MachineBasicBlock::instr_iterator I) : InternalIt(I) {}
- bool operator==(const Epilog_Iterator &other) {
- return internal_it == other.internal_it;
+ bool operator==(const EpilogIterator &Other) {
+ return InternalIt == Other.InternalIt;
}
- bool isEnd() { return internal_it.isEnd(); }
- MachineInstr &operator*() { return *internal_it; }
- MachineBasicBlock::instr_iterator operator->() { return internal_it; }
- Epilog_Iterator &operator++() {
- ++internal_it;
- if (!internal_it.isEnd() && internal_it->isBranch())
- internal_it = internal_it->getParent()->instr_end();
+ bool isEnd() { return InternalIt.isEnd(); }
+ MachineInstr &operator*() { return *InternalIt; }
+ MachineBasicBlock::instr_iterator operator->() { return InternalIt; }
+ EpilogIterator &operator++() {
+ ++InternalIt;
+ if (!InternalIt.isEnd() && InternalIt->isBranch())
+ InternalIt = InternalIt->getParent()->instr_end();
return *this;
}
- Epilog_Iterator operator++(int ignored) {
- Epilog_Iterator to_return = *this;
+ EpilogIterator operator++(int Ignored) {
+ EpilogIterator ToReturn = *this;
++*this;
- return to_return;
+ return ToReturn;
}
-
};
-static inline Epilog_Iterator
-get_epilog_for_successor(MachineBasicBlock& pred_MBB, MachineBasicBlock& succ_MBB) {
- MachineFunction& MF = *pred_MBB.getParent();
- auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+static inline EpilogIterator getEpilogForSuccessor(MachineBasicBlock &PredMBB,
+ MachineBasicBlock &SuccMBB) {
+ MachineFunction &MF = *PredMBB.getParent();
+ auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- for (MachineInstr &branch_MI : reverse(pred_MBB.instrs()))
- if (branch_MI.isBranch() && TII.getBranchDestBlock(branch_MI) == &succ_MBB)
- return ++Epilog_Iterator(branch_MI.getIterator());
+ for (MachineInstr &BranchMI : reverse(PredMBB.instrs()))
+ if (BranchMI.isBranch() && TII.getBranchDestBlock(BranchMI) == &SuccMBB)
+ return ++EpilogIterator(BranchMI.getIterator());
llvm_unreachable("There should always be a branch to succ_MBB.");
}
-static inline bool epilogs_are_identical(const vector<MachineInstr *> left,
- const vector<MachineInstr *> right,
- const MachineBasicBlock &succ_MBB) {
- if (left.size() != right.size())
+static inline bool epilogsAreIdentical(const vector<MachineInstr *> Left,
+ const vector<MachineInstr *> Right,
+ const MachineBasicBlock &SuccMBB) {
+ if (Left.size() != Right.size())
return false;
-
- for (unsigned i = 0; i < left.size(); i++)
- if (!left[i]->isIdenticalTo(*right[i]))
+
+ for (unsigned I = 0; I < Left.size(); I++)
+ if (!Left[I]->isIdenticalTo(*Right[I]))
return false;
return true;
}
-static inline void move_body(vector<MachineInstr *> &body,
- MachineBasicBlock &dest_MBB) {
- for (auto rev_it = body.rbegin(); rev_it != body.rend(); rev_it++) {
- MachineInstr &body_ins = **rev_it;
- body_ins.removeFromBundle();
- dest_MBB.insert(dest_MBB.begin(), &body_ins);
+static inline void moveBody(vector<MachineInstr *> &Body,
+ MachineBasicBlock &DestMBB) {
+ for (auto RevIt = Body.rbegin(); RevIt != Body.rend(); RevIt++) {
+ MachineInstr &BodyIns = **RevIt;
+ BodyIns.removeFromBundle();
+ DestMBB.insert(DestMBB.begin(), &BodyIns);
}
}
-static inline void normalize_ir_post_phi_elimination(MachineFunction &MF) {
- auto& TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+static inline void normalizeIrPostPhiElimination(MachineFunction &MF) {
+ auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
- struct CFG_Rewrite_Entry {
- unordered_set<MachineBasicBlock *> pred_MBBs;
- MachineBasicBlock *succ_MBB;
- vector<MachineInstr*> body;
+ struct CFGRewriteEntry {
+ unordered_set<MachineBasicBlock *> PredMBBs;
+ MachineBasicBlock *SuccMBB;
+ vector<MachineInstr *> Body;
};
- vector<CFG_Rewrite_Entry> cfg_rewrite_entries;
+ vector<CFGRewriteEntry> CfgRewriteEntries;
for (MachineBasicBlock &MBB : MF) {
- CFG_Rewrite_Entry to_insert = {{}, &MBB, {}};
- for (MachineBasicBlock *pred_MBB : MBB.predecessors()) {
- Epilog_Iterator ep_it =
- get_epilog_for_successor(*pred_MBB, MBB);
+ CFGRewriteEntry ToInsert = {{}, &MBB, {}};
+ for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ EpilogIterator EpIt = getEpilogForSuccessor(*PredMBB, MBB);
- vector<MachineInstr *> epilog;
- while (!ep_it.isEnd())
- epilog.push_back(&*ep_it++);
+ vector<MachineInstr *> Epilog;
+ while (!EpIt.isEnd())
+ Epilog.push_back(&*EpIt++);
- if (!epilogs_are_identical(to_insert.body, epilog, MBB)) {
- if (to_insert.pred_MBBs.size() && to_insert.body.size()) {
+ if (!epilogsAreIdentical(ToInsert.Body, Epilog, MBB)) {
+ if (ToInsert.PredMBBs.size() && ToInsert.Body.size()) {
// Potentially, we need to insert a new entry. But first see if we
// can find an existing entry with the same epilog.
- bool existing_entry_found = false;
- for (auto rev_it = cfg_rewrite_entries.rbegin();
- rev_it != cfg_rewrite_entries.rend() && rev_it->succ_MBB == &MBB;
- rev_it++)
- if (epilogs_are_identical(rev_it->body, epilog, MBB)) {
- rev_it->pred_MBBs.insert(pred_MBB);
- existing_entry_found = true;
+ bool ExistingEntryFound = false;
+ for (auto RevIt = CfgRewriteEntries.rbegin();
+ RevIt != CfgRewriteEntries.rend() && RevIt->SuccMBB == &MBB;
+ RevIt++)
+ if (epilogsAreIdentical(RevIt->Body, Epilog, MBB)) {
+ RevIt->PredMBBs.insert(PredMBB);
+ ExistingEntryFound = true;
break;
}
- if(!existing_entry_found)
- cfg_rewrite_entries.push_back(to_insert);
+ if (!ExistingEntryFound)
+ CfgRewriteEntries.push_back(ToInsert);
}
- to_insert.pred_MBBs.clear();
- to_insert.body = epilog;
+ ToInsert.PredMBBs.clear();
+ ToInsert.Body = Epilog;
}
-
- to_insert.pred_MBBs.insert(pred_MBB);
+
+ ToInsert.PredMBBs.insert(PredMBB);
}
// Handle the last potential rewrite entry. Lower instead of journaling a
// rewrite entry if all predecessor MBBs are in this single entry.
- if (to_insert.pred_MBBs.size() == MBB.pred_size()) {
- move_body(to_insert.body, MBB);
- for (MachineBasicBlock *pred_MBB : to_insert.pred_MBBs) {
+ if (ToInsert.PredMBBs.size() == MBB.pred_size()) {
+ moveBody(ToInsert.Body, MBB);
+ for (MachineBasicBlock *PredMBB : ToInsert.PredMBBs) {
// Delete instructions that were lowered from epilog
- MachineInstr &branch_ins =
- get_branch_with_dest(*pred_MBB, *to_insert.succ_MBB);
- auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
- while (!epilog_it.isEnd())
- epilog_it++->eraseFromBundle();
+ MachineInstr &BranchIns =
+ getBranchWithDest(*PredMBB, *ToInsert.SuccMBB);
+ auto EpilogIt = ++EpilogIterator(BranchIns.getIterator());
+ while (!EpilogIt.isEnd())
+ EpilogIt++->eraseFromBundle();
}
- }
- else if (to_insert.body.size())
- cfg_rewrite_entries.push_back(to_insert);
+ } else if (ToInsert.Body.size())
+ CfgRewriteEntries.push_back(ToInsert);
}
// Perform the journaled rewrites.
- for (auto &entry : cfg_rewrite_entries) {
- MachineBasicBlock *mezzanine_MBB = MF.CreateMachineBasicBlock();
- MF.insert(MF.end(),mezzanine_MBB);
+ for (auto &Entry : CfgRewriteEntries) {
+ MachineBasicBlock *MezzanineMBB = MF.CreateMachineBasicBlock();
+ MF.insert(MF.end(), MezzanineMBB);
// Deal with mezzanine to successor succession.
- BuildMI(mezzanine_MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH)).addMBB(entry.succ_MBB);
- mezzanine_MBB->addSuccessor(entry.succ_MBB);
+ BuildMI(MezzanineMBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH))
+ .addMBB(Entry.SuccMBB);
+ MezzanineMBB->addSuccessor(Entry.SuccMBB);
// Move instructions to mezzanine block.
- move_body(entry.body, *mezzanine_MBB);
+ moveBody(Entry.Body, *MezzanineMBB);
- for (MachineBasicBlock *pred_MBB : entry.pred_MBBs) {
- //Deal with predecessor to mezzanine succession.
- MachineInstr &branch_ins =
- get_branch_with_dest(*pred_MBB, *entry.succ_MBB);
- assert(branch_ins.getOperand(0).isMBB() && "Branch instruction isn't.");
- branch_ins.getOperand(0).setMBB(mezzanine_MBB);
- pred_MBB->replaceSuccessor(entry.succ_MBB, mezzanine_MBB);
+ for (MachineBasicBlock *PredMBB : Entry.PredMBBs) {
+ // Deal with predecessor to mezzanine succession.
+ MachineInstr &BranchIns = getBranchWithDest(*PredMBB, *Entry.SuccMBB);
+ assert(BranchIns.getOperand(0).isMBB() && "Branch instruction isn't.");
+ BranchIns.getOperand(0).setMBB(MezzanineMBB);
+ PredMBB->replaceSuccessor(Entry.SuccMBB, MezzanineMBB);
// Delete instructions that were lowered from epilog
- auto epilog_it = ++Epilog_Iterator(branch_ins.getIterator());
- while (!epilog_it.isEnd())
- epilog_it++->eraseFromBundle();
+ auto EpilogIt = ++EpilogIterator(BranchIns.getIterator());
+ while (!EpilogIt.isEnd())
+ EpilogIt++->eraseFromBundle();
}
}
}
namespace std {
- template <>
- struct hash<Register>
- {
- std::size_t operator()(const Register& r) const
- {
- return hash<unsigned>()(r);
- }
- };
-}
+template <> struct hash<Register> {
+ std::size_t operator()(const Register &R) const {
+ return hash<unsigned>()(R);
+ }
+};
+} // namespace std
-static inline void hoist_unrelated_copies(MachineFunction &MF) {
+static inline void hoistUnrelatedCopies(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (MachineInstr &branch_MI : MBB) {
- if (!branch_MI.isBranch())
+ for (MachineInstr &BranchMI : MBB) {
+ if (!BranchMI.isBranch())
continue;
- unordered_set<Register> related_copy_sources;
- Epilog_Iterator epilog_it = branch_MI.getIterator();
- Epilog_Iterator copy_move_it = ++epilog_it;
- while (!epilog_it.isEnd()) {
- if (epilog_it->getOpcode() != AMDGPU::COPY)
- related_copy_sources.insert(epilog_it->getOperand(0).getReg());
- ++epilog_it;
+ unordered_set<Register> RelatedCopySources;
+ EpilogIterator EpilogIt = BranchMI.getIterator();
+ EpilogIterator CopyMoveIt = ++EpilogIt;
+ while (!EpilogIt.isEnd()) {
+ if (EpilogIt->getOpcode() != AMDGPU::COPY)
+ RelatedCopySources.insert(EpilogIt->getOperand(0).getReg());
+ ++EpilogIt;
}
- while (!copy_move_it.isEnd()) {
- Epilog_Iterator next = copy_move_it; ++next;
- if (copy_move_it->getOpcode() == AMDGPU::COPY &&
- !related_copy_sources.count(copy_move_it->getOperand(1).getReg())
- || copy_move_it->getOpcode() == AMDGPU::IMPLICIT_DEF) {
- MachineInstr &MI_to_move = *copy_move_it;
- MI_to_move.removeFromBundle();
- MBB.insert(branch_MI.getIterator(),&MI_to_move);
+ while (!CopyMoveIt.isEnd()) {
+ EpilogIterator Next = CopyMoveIt;
+ ++Next;
+ if (CopyMoveIt->getOpcode() == AMDGPU::COPY &&
+ !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg()) ||
+ CopyMoveIt->getOpcode() == AMDGPU::IMPLICIT_DEF) {
+ MachineInstr &MIToMove = *CopyMoveIt;
+ MIToMove.removeFromBundle();
+ MBB.insert(BranchMI.getIterator(), &MIToMove);
}
-
- copy_move_it = next;
+
+ CopyMoveIt = Next;
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index f069d48c25ec6..7f6c03d29255e 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -331,7 +331,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
- move_ins_before_phis(*OrSaveExec);
+ moveInsBeforePhis(*OrSaveExec);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
diff --git a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
index ef565a9d9ad8f..d6d02c940731c 100644
--- a/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRestoreNormalEpilog.cpp
@@ -22,8 +22,8 @@ class SIRestoreNormalEpilogLegacy : public MachineFunctionPass {
SIRestoreNormalEpilogLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
- hoist_unrelated_copies(MF);
- normalize_ir_post_phi_elimination(MF);
+ hoistUnrelatedCopies(MF);
+ normalizeIrPostPhiElimination(MF);
return true;
}
>From 734b863853bc76086f708b52bb4d85bb729eaa3b Mon Sep 17 00:00:00 2001
From: Patrick Simmons <psimmons at pensando.io>
Date: Thu, 9 Oct 2025 14:27:53 -0500
Subject: [PATCH 9/9] Add unconditional-branch-to-next
---
.../lib/Target/AMDGPU/SICustomBranchBundles.h | 20 +++++++++++++++++--
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 2 +-
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
index acb547236b1eb..03ee6a251f391 100644
--- a/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
+++ b/llvm/lib/Target/AMDGPU/SICustomBranchBundles.h
@@ -231,8 +231,8 @@ static inline void hoistUnrelatedCopies(MachineFunction &MF) {
while (!CopyMoveIt.isEnd()) {
EpilogIterator Next = CopyMoveIt;
++Next;
- if (CopyMoveIt->getOpcode() == AMDGPU::COPY &&
- !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg()) ||
+ if ((CopyMoveIt->getOpcode() == AMDGPU::COPY &&
+ !RelatedCopySources.count(CopyMoveIt->getOperand(1).getReg())) ||
CopyMoveIt->getOpcode() == AMDGPU::IMPLICIT_DEF) {
MachineInstr &MIToMove = *CopyMoveIt;
MIToMove.removeFromBundle();
@@ -243,3 +243,19 @@ static inline void hoistUnrelatedCopies(MachineFunction &MF) {
}
}
}
+
+static inline bool makeEverySuccessorBeBranchTarget(MachineFunction &MF) {
+ bool Changed = false;
+ auto &TII = *MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.empty() ||
+ (!MBB.back().isUnconditionalBranch() && !MBB.back().isReturn())) {
+ MachineBasicBlock *LayoutSuccessor =
+ &*std::next(MachineFunction::iterator(MBB));
+ BuildMI(&MBB, DebugLoc(), TII.get(AMDGPU::S_BRANCH))
+ .addMBB(LayoutSuccessor);
+ Changed = true;
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 7f6c03d29255e..64d46c5e401de 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -800,7 +800,7 @@ bool SILowerControlFlow::run(MachineFunction &MF) {
}
}
- bool Changed = false;
+ bool Changed = makeEverySuccessorBeBranchTarget(MF);
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin();
BI != MF.end(); BI = NextBB) {
More information about the llvm-commits
mailing list