[llvm] [IR] Add CallBr intrinsics support (PR #133907)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 1 21:54:39 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Robert Imschweiler (ro-i)

<details>
<summary>Changes</summary>

This commit adds support for using intrinsics with callbr. The uses of this will most of the time look like this example:
```llvm
  callbr void @<!-- -->llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
  unreachable
cont:
  ...
```

@<!-- -->arsenm 

---
Full diff: https://github.com/llvm/llvm-project/pull/133907.diff


8 Files Affected:

- (modified) llvm/include/llvm/Analysis/RegionInfoImpl.h (+12) 
- (modified) llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h (+3-3) 
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+58-2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+35-11) 
- (modified) llvm/lib/IR/Verifier.cpp (+24-5) 
- (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+5-6) 
- (modified) llvm/lib/Transforms/Utils/BasicBlockUtils.cpp (+2-2) 
- (added) llvm/test/CodeGen/AMDGPU/callbr.ll (+70) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index eb99d8bc6fb23..759e9c47bebb8 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -553,6 +553,18 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
 
   using DST = typename DomFrontierT::DomSetType;
 
+  // TODO? post domination frontier?
+  if constexpr (std::is_same_v<BlockT, BasicBlock>) {
+    if (DomTreeNodeT *PDTNode = PDT->getNode(exit); PDTNode) {
+      for (DomTreeNodeT *PredNode : *PDTNode) {
+        for (BasicBlock *Pred : predecessors(PredNode->getBlock())) {
+          if (isa<CallBrInst>(Pred->getTerminator()))
+            return false;
+        }
+      }
+    }
+  }
+
   DST *entrySuccs = &DF->find(entry)->second;
 
   // Exit is the header of a loop that contains the entry. In this case,
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6faff3d1fd8e3..59143d235eb93 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -606,9 +606,9 @@ bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI,
 // successors
 void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
 
-// Check whether the function only has simple terminator:
-// br/brcond/unreachable/ret
-bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators:
+// br/brcond/unreachable/ret (or callbr if AllowCallBr)
+bool hasOnlySimpleTerminator(const Function &F, bool AllowCallBr = true);
 
 // Returns true if these basic blocks belong to a presplit coroutine and the
 // edge corresponds to the 'default' case in the switch statement in the
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f8afb42bf5535..0f698375ad6cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3009,8 +3009,64 @@ bool IRTranslator::translateInvoke(const User &U,
 
 bool IRTranslator::translateCallBr(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
-  // FIXME: Implement this.
-  return false;
+  const CallBrInst &I = cast<CallBrInst>(U);
+  MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
+
+  // TODO: operand bundles (see SelDAG implementation of callbr)?
+  assert(!I.hasOperandBundles() &&
+         "Cannot lower callbrs with operand bundles yet");
+
+  if (I.isInlineAsm()) {
+    // FIXME: inline asm not yet supported
+    if (!translateInlineAsm(I, MIRBuilder))
+      return false;
+  } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+    switch (I.getIntrinsicID()) {
+    default:
+      report_fatal_error("Unsupported intrinsic for callbr");
+    case Intrinsic::amdgcn_kill:
+      if (I.getNumIndirectDests() != 1)
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      bool Success = translateCall(*CI, MIRBuilder);
+      CI->deleteValue();
+      if (!Success)
+        return false;
+      break;
+    }
+  } else {
+    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+  }
+
+  // Retrieve successors.
+  SmallPtrSet<BasicBlock *, 8> Dests;
+  Dests.insert(I.getDefaultDest());
+  MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
+
+  // Update successor info.
+  addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+  // TODO: For most of the cases where there is an intrinsic callbr, we're
+  // having exactly one indirect target, which will be unreachable. As soon as
+  // this changes, we might need to enhance
+  // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+  // intrinsic indirect branches.
+  if (I.isInlineAsm()) {
+    for (BasicBlock *Dest : I.getIndirectDests()) {
+      MachineBasicBlock *Target = &getMBB(*Dest);
+      Target->setIsInlineAsmBrIndirectTarget();
+      Target->setMachineBlockAddressTaken();
+      Target->setLabelMustBeEmitted();
+      // Don't add duplicate machine successors.
+      if (Dests.insert(Dest).second)
+        addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+    }
+  }
+  CallBrMBB->normalizeSuccProbs();
+
+  return true;
 }
 
 bool IRTranslator::translateLandingPad(const User &U,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6db2a5ffbfb84..c9501128cd593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3385,8 +3385,26 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
              {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
          "Cannot lower callbrs with arbitrary operand bundles yet!");
 
-  assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
-  visitInlineAsm(I);
+  if (I.isInlineAsm()) {
+    visitInlineAsm(I);
+  } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+    switch (I.getIntrinsicID()) {
+    default:
+      report_fatal_error("Unsupported intrinsic for callbr");
+    case Intrinsic::amdgcn_kill:
+      if (I.getNumIndirectDests() != 1)
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      visitCall(*CI);
+      CI->deleteValue();
+      break;
+    }
+  } else {
+    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+  }
   CopyToExportRegsIfNeeded(&I);
 
   // Retrieve successors.
@@ -3396,15 +3414,21 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
 
   // Update successor info.
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
-  for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
-    BasicBlock *Dest = I.getIndirectDest(i);
-    MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
-    Target->setIsInlineAsmBrIndirectTarget();
-    Target->setMachineBlockAddressTaken();
-    Target->setLabelMustBeEmitted();
-    // Don't add duplicate machine successors.
-    if (Dests.insert(Dest).second)
-      addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+  // TODO: For most of the cases where there is an intrinsic callbr, we're
+  // having exactly one indirect target, which will be unreachable. As soon as
+  // this changes, we might need to enhance
+  // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+  // intrinsic indirect branches.
+  if (I.isInlineAsm()) {
+    for (BasicBlock *Dest : I.getIndirectDests()) {
+      MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+      Target->setIsInlineAsmBrIndirectTarget();
+      Target->setMachineBlockAddressTaken();
+      Target->setLabelMustBeEmitted();
+      // Don't add duplicate machine successors.
+      if (Dests.insert(Dest).second)
+        addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+    }
   }
   CallBrMBB->normalizeSuccProbs();
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ed86a10c3a25f..fbf6e087177c6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3249,11 +3249,30 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
 }
 
 void Verifier::visitCallBrInst(CallBrInst &CBI) {
-  Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
-  const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
-  Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+  if (!CBI.isInlineAsm()) {
+    switch (CBI.getIntrinsicID()) {
+    case Intrinsic::amdgcn_kill: {
+      Check(CBI.getNumIndirectDests() == 1,
+            "Callbr amdgcn_kill only supports one indirect dest");
+      bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
+      CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
+      Check(Unreachable || (Call && Call->getIntrinsicID() ==
+                                        Intrinsic::amdgcn_unreachable),
+            "Callbr amdgcn_kill indirect dest needs to be unreachable");
+      visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
+      break;
+    }
+    default:
+      CheckFailed(
+          "Callbr currently only supports asm-goto and selected intrinsics");
+    }
+    visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
+  } else {
+    const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
+    Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
 
-  verifyInlineAsmCall(CBI);
+    verifyInlineAsmCall(CBI);
+  }
   visitTerminator(CBI);
 }
 
@@ -5211,7 +5230,7 @@ void Verifier::visitInstruction(Instruction &I) {
              (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
              IsAttachedCallOperand(F, CBI, i)),
             "Cannot take the address of an intrinsic!", &I);
-      Check(!F->isIntrinsic() || isa<CallInst>(I) ||
+      Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
                 F->getIntrinsicID() == Intrinsic::donothing ||
                 F->getIntrinsicID() == Intrinsic::seh_try_begin ||
                 F->getIntrinsicID() == Intrinsic::seh_try_end ||
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d1054b9b045ca..bdd8b5fbb3212 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -486,11 +486,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
   } else {
     // Test for successors as back edge
     BasicBlock *BB = N->getNodeAs<BasicBlock>();
-    BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
-    for (BasicBlock *Succ : Term->successors())
-      if (Visited.count(Succ))
-        Loops[Succ] = BB;
+    if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); Term)
+      for (BasicBlock *Succ : Term->successors())
+        if (Visited.count(Succ))
+          Loops[Succ] = BB;
   }
 }
 
@@ -522,7 +521,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
 
   for (BasicBlock *P : predecessors(BB)) {
     // Ignore it if it's a branch from outside into our region entry
-    if (!ParentRegion->contains(P))
+    if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
       continue;
 
     Region *R = RI->getRegionFor(P);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ce5bf0c7207c7..3090f65fac627 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1907,11 +1907,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
   PBI->swapSuccessors();
 }
 
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+bool llvm::hasOnlySimpleTerminator(const Function &F, bool AllowCallBr) {
   for (auto &BB : F) {
     auto *Term = BB.getTerminator();
     if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
-          isa<BranchInst>(Term)))
+          isa<BranchInst>(Term) || (AllowCallBr && isa<CallBrInst>(Term))))
       return false;
   }
   return true;
diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
new file mode 100644
index 0000000000000..e2e84dca96cbf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -0,0 +1,70 @@
+; RUN: rm -rf %t && split-file %s %t
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-seldag.s < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=SELDAG %s < %t/with-callbr-seldag.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-gisel.s -global-isel < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=GISEL %s < %t/with-callbr-gisel.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-seldag.s < %t/without-callbr.ll
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-gisel.s -global-isel < %t/without-callbr.ll
+; RUN: diff %t/with-callbr-seldag.s %t/without-callbr-seldag.s
+; RUN: diff %t/with-callbr-gisel.s %t/without-callbr-gisel.s
+
+;--- with-callbr.ll
+
+; SELDAG-LABEL: test_kill:
+; SELDAG-NEXT:  ; %bb.0:
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_load_dword v0, v[0:1]
+; SELDAG-NEXT:      v_and_b32_e32 v1, 1, v4
+; SELDAG-NEXT:      v_cmp_eq_u32_e32 vcc, 1, v1
+; SELDAG-NEXT:      s_mov_b64 s[4:5], exec
+; SELDAG-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; SELDAG-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; SELDAG-NEXT:      s_cbranch_scc0 .LBB0_2
+; SELDAG-NEXT:  ; %bb.1:
+; SELDAG-NEXT:      s_and_b64 exec, exec, s[4:5]
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_store_dword v[2:3], v0
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      s_setpc_b64 s[30:31]
+; SELDAG-NEXT:  .LBB0_2:
+; SELDAG-NEXT:      s_mov_b64 exec, 0
+; SELDAG-NEXT:      s_endpgm
+
+; GISEL-LABEL: test_kill:
+; GISEL-NEXT:  ; %bb.0:
+; GISEL-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_load_dword v0, v[0:1]
+; GISEL-NEXT:      v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT:      v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:      s_mov_b64 s[4:5], exec
+; GISEL-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT:      s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:      s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_store_dword v[2:3], v0
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_2:
+; GISEL-NEXT:      s_mov_b64 exec, 0
+; GISEL-NEXT:      s_endpgm
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+  %a = load i32, ptr %src, align 4
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+  unreachable
+cont:
+  store i32 %a, ptr %dst, align 4
+  ret void
+}
+
+;--- without-callbr.ll
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+  %a = load i32, ptr %src, align 4
+  call void @llvm.amdgcn.kill(i1 %c)
+  store i32 %a, ptr %dst, align 4
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/133907


More information about the llvm-commits mailing list