[llvm] [AMDGPU] Only insert intrinsic declarations when needed (PR #117998)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 28 04:52:12 PST 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/117998
>From 5a875e65796767daad872a4e69c53edaceacf237 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 28 Nov 2024 11:59:15 +0000
Subject: [PATCH 1/2] [AMDGPU] Only insert intrinsic declarations when needed
---
.../Target/AMDGPU/SIAnnotateControlFlow.cpp | 74 ++++++++++---------
1 file changed, 39 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index ea653490f1bf37..0e0facea22b37a 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -38,6 +38,7 @@ using StackVector = SmallVector<StackEntry, 16>;
class SIAnnotateControlFlow {
private:
+ Function *F;
UniformityInfo *UA;
Type *Boolean;
@@ -50,18 +51,18 @@ class SIAnnotateControlFlow {
UndefValue *BoolUndef;
Constant *IntMaskZero;
- Function *If;
- Function *Else;
- Function *IfBreak;
- Function *Loop;
- Function *EndCf;
+ Function *If = nullptr;
+ Function *Else = nullptr;
+ Function *IfBreak = nullptr;
+ Function *Loop = nullptr;
+ Function *EndCf = nullptr;
DominatorTree *DT;
StackVector Stack;
LoopInfo *LI;
- void initialize(Module &M, const GCNSubtarget &ST);
+ void initialize(const GCNSubtarget &ST);
bool isUniform(BranchInst *T);
@@ -89,21 +90,30 @@ class SIAnnotateControlFlow {
bool closeControlFlow(BasicBlock *BB);
+ Function *getDecl(Function *&Cache, Intrinsic::ID ID, unsigned NumTypes) {
+ if (!Cache) {
+ Module *M = F->getParent();
+ SmallVector<Type *, 2> Types(NumTypes, IntMask);
+ Cache = Intrinsic::getOrInsertDeclaration(M, ID, Types);
+ }
+ return Cache;
+ }
+
public:
- SIAnnotateControlFlow(Module &M, const GCNSubtarget &ST, DominatorTree &DT,
+ SIAnnotateControlFlow(Function &F, const GCNSubtarget &ST, DominatorTree &DT,
LoopInfo &LI, UniformityInfo &UA)
- : UA(&UA), DT(&DT), LI(&LI) {
- initialize(M, ST);
+ : F(&F), UA(&UA), DT(&DT), LI(&LI) {
+ initialize(ST);
}
- bool run(Function &F);
+ bool run();
};
} // end anonymous namespace
/// Initialize all the types and constants used in the pass
-void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
- LLVMContext &Context = M.getContext();
+void SIAnnotateControlFlow::initialize(const GCNSubtarget &ST) {
+ LLVMContext &Context = F->getContext();
Void = Type::getVoidTy(Context);
Boolean = Type::getInt1Ty(Context);
@@ -115,16 +125,6 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
BoolFalse = ConstantInt::getFalse(Context);
BoolUndef = PoisonValue::get(Boolean);
IntMaskZero = ConstantInt::get(IntMask, 0);
-
- If = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_if, {IntMask});
- Else = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_else,
- {IntMask, IntMask});
- IfBreak = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_if_break,
- {IntMask});
- Loop =
- Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_loop, {IntMask});
- EndCf = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_end_cf,
- {IntMask});
}
/// Is the branch condition uniform or did the StructurizeCFG pass
@@ -190,7 +190,8 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
return false;
IRBuilder<> IRB(Term);
- Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
+ Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, 1),
+ {Term->getCondition()});
Value *Cond = IRB.CreateExtractValue(IfCall, {0});
Value *Mask = IRB.CreateExtractValue(IfCall, {1});
Term->setCondition(Cond);
@@ -205,7 +206,8 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
IRBuilder<> IRB(Term);
- Value *ElseCall = IRB.CreateCall(Else, {popSaved()});
+ Value *ElseCall =
+ IRB.CreateCall(getDecl(Else, Intrinsic::amdgcn_else, 2), {popSaved()});
Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
Term->setCondition(Cond);
@@ -218,7 +220,8 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
- return IRBuilder<>(I).CreateCall(IfBreak, {Cond, Broken});
+ return IRBuilder<>(I).CreateCall(
+ getDecl(IfBreak, Intrinsic::amdgcn_if_break, 1), {Cond, Broken});
};
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
@@ -279,7 +282,8 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
Broken->addIncoming(PHIValue, Pred);
}
- CallInst *LoopCall = IRBuilder<>(Term).CreateCall(Loop, {Arg});
+ CallInst *LoopCall = IRBuilder<>(Term).CreateCall(
+ getDecl(Loop, Intrinsic::amdgcn_loop, 1), {Arg});
Term->setCondition(LoopCall);
push(Term->getSuccessor(0), Arg);
@@ -324,7 +328,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// condition, for now just avoid copying these DebugLocs so that stepping
// out of the then/else block in a debugger doesn't step to the condition.
IRB.SetCurrentDebugLocation(DebugLoc());
- IRB.CreateCall(EndCf, {Exec});
+ IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, 1), {Exec});
}
return true;
@@ -332,11 +336,12 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
/// Annotate the control flow with intrinsics so the backend can
/// recognize if/then/else and loops.
-bool SIAnnotateControlFlow::run(Function &F) {
+bool SIAnnotateControlFlow::run() {
bool Changed = false;
- for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
- E = df_end(&F.getEntryBlock()); I != E; ++I) {
+ for (df_iterator<BasicBlock *> I = df_begin(&F->getEntryBlock()),
+ E = df_end(&F->getEntryBlock());
+ I != E; ++I) {
BasicBlock *BB = *I;
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
@@ -386,10 +391,9 @@ PreservedAnalyses SIAnnotateControlFlowPass::run(Function &F,
UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
- SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
+ SIAnnotateControlFlow Impl(F, ST, DT, LI, UI);
- // FIXME: We introduce dead declarations of intrinsics even if never used.
- bool Changed = Impl.run(F);
+ bool Changed = Impl.run();
if (!Changed)
return PreservedAnalyses::none();
@@ -426,8 +430,8 @@ class SIAnnotateControlFlowLegacy : public FunctionPass {
const TargetMachine &TM = TPC.getTM<TargetMachine>();
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
- return Impl.run(F);
+ SIAnnotateControlFlow Impl(F, ST, DT, LI, UI);
+ return Impl.run();
}
};
>From 75f0f8b0eedbfe449da90c794135005620112ab6 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 28 Nov 2024 12:51:37 +0000
Subject: [PATCH 2/2] Simplify getDecl
---
.../Target/AMDGPU/SIAnnotateControlFlow.cpp | 21 ++++++++-----------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 0e0facea22b37a..09ce44897504c3 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -90,12 +90,9 @@ class SIAnnotateControlFlow {
bool closeControlFlow(BasicBlock *BB);
- Function *getDecl(Function *&Cache, Intrinsic::ID ID, unsigned NumTypes) {
- if (!Cache) {
- Module *M = F->getParent();
- SmallVector<Type *, 2> Types(NumTypes, IntMask);
- Cache = Intrinsic::getOrInsertDeclaration(M, ID, Types);
- }
+ Function *getDecl(Function *&Cache, Intrinsic::ID ID, ArrayRef<Type *> Tys) {
+ if (!Cache)
+ Cache = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
return Cache;
}
@@ -190,7 +187,7 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
return false;
IRBuilder<> IRB(Term);
- Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, 1),
+ Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, IntMask),
{Term->getCondition()});
Value *Cond = IRB.CreateExtractValue(IfCall, {0});
Value *Mask = IRB.CreateExtractValue(IfCall, {1});
@@ -206,8 +203,8 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
IRBuilder<> IRB(Term);
- Value *ElseCall =
- IRB.CreateCall(getDecl(Else, Intrinsic::amdgcn_else, 2), {popSaved()});
+ Value *ElseCall = IRB.CreateCall(
+ getDecl(Else, Intrinsic::amdgcn_else, {IntMask, IntMask}), {popSaved()});
Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
Term->setCondition(Cond);
@@ -221,7 +218,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
return IRBuilder<>(I).CreateCall(
- getDecl(IfBreak, Intrinsic::amdgcn_if_break, 1), {Cond, Broken});
+ getDecl(IfBreak, Intrinsic::amdgcn_if_break, IntMask), {Cond, Broken});
};
if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
@@ -283,7 +280,7 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
}
CallInst *LoopCall = IRBuilder<>(Term).CreateCall(
- getDecl(Loop, Intrinsic::amdgcn_loop, 1), {Arg});
+ getDecl(Loop, Intrinsic::amdgcn_loop, IntMask), {Arg});
Term->setCondition(LoopCall);
push(Term->getSuccessor(0), Arg);
@@ -328,7 +325,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// condition, for now just avoid copying these DebugLocs so that stepping
// out of the then/else block in a debugger doesn't step to the condition.
IRB.SetCurrentDebugLocation(DebugLoc());
- IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, 1), {Exec});
+ IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, IntMask), {Exec});
}
return true;
More information about the llvm-commits
mailing list