[llvm] [AMDGPU] Only insert intrinsic declarations when needed (PR #117998)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 28 04:52:12 PST 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/117998

>From 5a875e65796767daad872a4e69c53edaceacf237 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 28 Nov 2024 11:59:15 +0000
Subject: [PATCH 1/2] [AMDGPU] Only insert intrinsic declarations when needed

---
 .../Target/AMDGPU/SIAnnotateControlFlow.cpp   | 74 ++++++++++---------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index ea653490f1bf37..0e0facea22b37a 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -38,6 +38,7 @@ using StackVector = SmallVector<StackEntry, 16>;
 
 class SIAnnotateControlFlow {
 private:
+  Function *F;
   UniformityInfo *UA;
 
   Type *Boolean;
@@ -50,18 +51,18 @@ class SIAnnotateControlFlow {
   UndefValue *BoolUndef;
   Constant *IntMaskZero;
 
-  Function *If;
-  Function *Else;
-  Function *IfBreak;
-  Function *Loop;
-  Function *EndCf;
+  Function *If = nullptr;
+  Function *Else = nullptr;
+  Function *IfBreak = nullptr;
+  Function *Loop = nullptr;
+  Function *EndCf = nullptr;
 
   DominatorTree *DT;
   StackVector Stack;
 
   LoopInfo *LI;
 
-  void initialize(Module &M, const GCNSubtarget &ST);
+  void initialize(const GCNSubtarget &ST);
 
   bool isUniform(BranchInst *T);
 
@@ -89,21 +90,30 @@ class SIAnnotateControlFlow {
 
   bool closeControlFlow(BasicBlock *BB);
 
+  Function *getDecl(Function *&Cache, Intrinsic::ID ID, unsigned NumTypes) {
+    if (!Cache) {
+      Module *M = F->getParent();
+      SmallVector<Type *, 2> Types(NumTypes, IntMask);
+      Cache = Intrinsic::getOrInsertDeclaration(M, ID, Types);
+    }
+    return Cache;
+  }
+
 public:
-  SIAnnotateControlFlow(Module &M, const GCNSubtarget &ST, DominatorTree &DT,
+  SIAnnotateControlFlow(Function &F, const GCNSubtarget &ST, DominatorTree &DT,
                         LoopInfo &LI, UniformityInfo &UA)
-      : UA(&UA), DT(&DT), LI(&LI) {
-    initialize(M, ST);
+      : F(&F), UA(&UA), DT(&DT), LI(&LI) {
+    initialize(ST);
   }
 
-  bool run(Function &F);
+  bool run();
 };
 
 } // end anonymous namespace
 
 /// Initialize all the types and constants used in the pass
-void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
-  LLVMContext &Context = M.getContext();
+void SIAnnotateControlFlow::initialize(const GCNSubtarget &ST) {
+  LLVMContext &Context = F->getContext();
 
   Void = Type::getVoidTy(Context);
   Boolean = Type::getInt1Ty(Context);
@@ -115,16 +125,6 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
   BoolFalse = ConstantInt::getFalse(Context);
   BoolUndef = PoisonValue::get(Boolean);
   IntMaskZero = ConstantInt::get(IntMask, 0);
-
-  If = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_if, {IntMask});
-  Else = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_else,
-                                           {IntMask, IntMask});
-  IfBreak = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_if_break,
-                                              {IntMask});
-  Loop =
-      Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_loop, {IntMask});
-  EndCf = Intrinsic::getOrInsertDeclaration(&M, Intrinsic::amdgcn_end_cf,
-                                            {IntMask});
 }
 
 /// Is the branch condition uniform or did the StructurizeCFG pass
@@ -190,7 +190,8 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
     return false;
 
   IRBuilder<> IRB(Term);
-  Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
+  Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, 1),
+                                 {Term->getCondition()});
   Value *Cond = IRB.CreateExtractValue(IfCall, {0});
   Value *Mask = IRB.CreateExtractValue(IfCall, {1});
   Term->setCondition(Cond);
@@ -205,7 +206,8 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
   }
 
   IRBuilder<> IRB(Term);
-  Value *ElseCall = IRB.CreateCall(Else, {popSaved()});
+  Value *ElseCall =
+      IRB.CreateCall(getDecl(Else, Intrinsic::amdgcn_else, 2), {popSaved()});
   Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
   Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
   Term->setCondition(Cond);
@@ -218,7 +220,8 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
     Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
 
   auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
-    return IRBuilder<>(I).CreateCall(IfBreak, {Cond, Broken});
+    return IRBuilder<>(I).CreateCall(
+        getDecl(IfBreak, Intrinsic::amdgcn_if_break, 1), {Cond, Broken});
   };
 
   if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
@@ -279,7 +282,8 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
     Broken->addIncoming(PHIValue, Pred);
   }
 
-  CallInst *LoopCall = IRBuilder<>(Term).CreateCall(Loop, {Arg});
+  CallInst *LoopCall = IRBuilder<>(Term).CreateCall(
+      getDecl(Loop, Intrinsic::amdgcn_loop, 1), {Arg});
   Term->setCondition(LoopCall);
 
   push(Term->getSuccessor(0), Arg);
@@ -324,7 +328,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
     // condition, for now just avoid copying these DebugLocs so that stepping
     // out of the then/else block in a debugger doesn't step to the condition.
     IRB.SetCurrentDebugLocation(DebugLoc());
-    IRB.CreateCall(EndCf, {Exec});
+    IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, 1), {Exec});
   }
 
   return true;
@@ -332,11 +336,12 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
 
 /// Annotate the control flow with intrinsics so the backend can
 /// recognize if/then/else and loops.
-bool SIAnnotateControlFlow::run(Function &F) {
+bool SIAnnotateControlFlow::run() {
   bool Changed = false;
 
-  for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
-       E = df_end(&F.getEntryBlock()); I != E; ++I) {
+  for (df_iterator<BasicBlock *> I = df_begin(&F->getEntryBlock()),
+                                 E = df_end(&F->getEntryBlock());
+       I != E; ++I) {
     BasicBlock *BB = *I;
     BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
 
@@ -386,10 +391,9 @@ PreservedAnalyses SIAnnotateControlFlowPass::run(Function &F,
   UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);
   LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
 
-  SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
+  SIAnnotateControlFlow Impl(F, ST, DT, LI, UI);
 
-  // FIXME: We introduce dead declarations of intrinsics even if never used.
-  bool Changed = Impl.run(F);
+  bool Changed = Impl.run();
   if (!Changed)
     return PreservedAnalyses::none();
 
@@ -426,8 +430,8 @@ class SIAnnotateControlFlowLegacy : public FunctionPass {
     const TargetMachine &TM = TPC.getTM<TargetMachine>();
     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
 
-    SIAnnotateControlFlow Impl(*F.getParent(), ST, DT, LI, UI);
-    return Impl.run(F);
+    SIAnnotateControlFlow Impl(F, ST, DT, LI, UI);
+    return Impl.run();
   }
 };
 

>From 75f0f8b0eedbfe449da90c794135005620112ab6 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 28 Nov 2024 12:51:37 +0000
Subject: [PATCH 2/2] Simplify getDecl

---
 .../Target/AMDGPU/SIAnnotateControlFlow.cpp   | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 0e0facea22b37a..09ce44897504c3 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -90,12 +90,9 @@ class SIAnnotateControlFlow {
 
   bool closeControlFlow(BasicBlock *BB);
 
-  Function *getDecl(Function *&Cache, Intrinsic::ID ID, unsigned NumTypes) {
-    if (!Cache) {
-      Module *M = F->getParent();
-      SmallVector<Type *, 2> Types(NumTypes, IntMask);
-      Cache = Intrinsic::getOrInsertDeclaration(M, ID, Types);
-    }
+  Function *getDecl(Function *&Cache, Intrinsic::ID ID, ArrayRef<Type *> Tys) {
+    if (!Cache)
+      Cache = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
     return Cache;
   }
 
@@ -190,7 +187,7 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
     return false;
 
   IRBuilder<> IRB(Term);
-  Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, 1),
+  Value *IfCall = IRB.CreateCall(getDecl(If, Intrinsic::amdgcn_if, IntMask),
                                  {Term->getCondition()});
   Value *Cond = IRB.CreateExtractValue(IfCall, {0});
   Value *Mask = IRB.CreateExtractValue(IfCall, {1});
@@ -206,8 +203,8 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
   }
 
   IRBuilder<> IRB(Term);
-  Value *ElseCall =
-      IRB.CreateCall(getDecl(Else, Intrinsic::amdgcn_else, 2), {popSaved()});
+  Value *ElseCall = IRB.CreateCall(
+      getDecl(Else, Intrinsic::amdgcn_else, {IntMask, IntMask}), {popSaved()});
   Value *Cond = IRB.CreateExtractValue(ElseCall, {0});
   Value *Mask = IRB.CreateExtractValue(ElseCall, {1});
   Term->setCondition(Cond);
@@ -221,7 +218,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
 
   auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
     return IRBuilder<>(I).CreateCall(
-        getDecl(IfBreak, Intrinsic::amdgcn_if_break, 1), {Cond, Broken});
+        getDecl(IfBreak, Intrinsic::amdgcn_if_break, IntMask), {Cond, Broken});
   };
 
   if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
@@ -283,7 +280,7 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
   }
 
   CallInst *LoopCall = IRBuilder<>(Term).CreateCall(
-      getDecl(Loop, Intrinsic::amdgcn_loop, 1), {Arg});
+      getDecl(Loop, Intrinsic::amdgcn_loop, IntMask), {Arg});
   Term->setCondition(LoopCall);
 
   push(Term->getSuccessor(0), Arg);
@@ -328,7 +325,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
     // condition, for now just avoid copying these DebugLocs so that stepping
     // out of the then/else block in a debugger doesn't step to the condition.
     IRB.SetCurrentDebugLocation(DebugLoc());
-    IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, 1), {Exec});
+    IRB.CreateCall(getDecl(EndCf, Intrinsic::amdgcn_end_cf, IntMask), {Exec});
   }
 
   return true;



More information about the llvm-commits mailing list