[llvm] [IR] Add CallBr intrinsics support (PR #133907)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 1 21:54:39 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Robert Imschweiler (ro-i)
<details>
<summary>Changes</summary>
This commit adds support for using intrinsics with callbr. The uses of this will most of the time look like this example:
```llvm
callbr void @<!-- -->llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
unreachable
cont:
...
```
@<!-- -->arsenm
---
Full diff: https://github.com/llvm/llvm-project/pull/133907.diff
8 Files Affected:
- (modified) llvm/include/llvm/Analysis/RegionInfoImpl.h (+12)
- (modified) llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h (+3-3)
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+58-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+35-11)
- (modified) llvm/lib/IR/Verifier.cpp (+24-5)
- (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+5-6)
- (modified) llvm/lib/Transforms/Utils/BasicBlockUtils.cpp (+2-2)
- (added) llvm/test/CodeGen/AMDGPU/callbr.ll (+70)
``````````diff
diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index eb99d8bc6fb23..759e9c47bebb8 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -553,6 +553,18 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
using DST = typename DomFrontierT::DomSetType;
+ // TODO? post domination frontier?
+ if constexpr (std::is_same_v<BlockT, BasicBlock>) {
+ if (DomTreeNodeT *PDTNode = PDT->getNode(exit); PDTNode) {
+ for (DomTreeNodeT *PredNode : *PDTNode) {
+ for (BasicBlock *Pred : predecessors(PredNode->getBlock())) {
+ if (isa<CallBrInst>(Pred->getTerminator()))
+ return false;
+ }
+ }
+ }
+ }
+
DST *entrySuccs = &DF->find(entry)->second;
// Exit is the header of a loop that contains the entry. In this case,
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6faff3d1fd8e3..59143d235eb93 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -606,9 +606,9 @@ bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI,
// successors
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-// Check whether the function only has simple terminator:
-// br/brcond/unreachable/ret
-bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators:
+// br/brcond/unreachable/ret (or callbr if AllowCallBr)
+bool hasOnlySimpleTerminator(const Function &F, bool AllowCallBr = true);
// Returns true if these basic blocks belong to a presplit coroutine and the
// edge corresponds to the 'default' case in the switch statement in the
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f8afb42bf5535..0f698375ad6cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3009,8 +3009,64 @@ bool IRTranslator::translateInvoke(const User &U,
bool IRTranslator::translateCallBr(const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: Implement this.
- return false;
+ const CallBrInst &I = cast<CallBrInst>(U);
+ MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
+
+ // TODO: operand bundles (see SelDAG implementation of callbr)?
+ assert(!I.hasOperandBundles() &&
+ "Cannot lower callbrs with operand bundles yet");
+
+ if (I.isInlineAsm()) {
+ // FIXME: inline asm not yet supported
+ if (!translateInlineAsm(I, MIRBuilder))
+ return false;
+ } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (I.getIntrinsicID()) {
+ default:
+ report_fatal_error("Unsupported intrinsic for callbr");
+ case Intrinsic::amdgcn_kill:
+ if (I.getNumIndirectDests() != 1)
+ report_fatal_error(
+ "amdgcn.kill supportes exactly one indirect destination");
+ CallInst *CI =
+ CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+ SmallVector<Value *, 1>(I.args()));
+ bool Success = translateCall(*CI, MIRBuilder);
+ CI->deleteValue();
+ if (!Success)
+ return false;
+ break;
+ }
+ } else {
+ report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+ }
+
+ // Retrieve successors.
+ SmallPtrSet<BasicBlock *, 8> Dests;
+ Dests.insert(I.getDefaultDest());
+ MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = &getMBB(*Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ return true;
}
bool IRTranslator::translateLandingPad(const User &U,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6db2a5ffbfb84..c9501128cd593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3385,8 +3385,26 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
- assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
- visitInlineAsm(I);
+ if (I.isInlineAsm()) {
+ visitInlineAsm(I);
+ } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (I.getIntrinsicID()) {
+ default:
+ report_fatal_error("Unsupported intrinsic for callbr");
+ case Intrinsic::amdgcn_kill:
+ if (I.getNumIndirectDests() != 1)
+ report_fatal_error(
+ "amdgcn.kill supportes exactly one indirect destination");
+ CallInst *CI =
+ CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+ SmallVector<Value *, 1>(I.args()));
+ visitCall(*CI);
+ CI->deleteValue();
+ break;
+ }
+ } else {
+ report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+ }
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
@@ -3396,15 +3414,21 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
- MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
- Target->setIsInlineAsmBrIndirectTarget();
- Target->setMachineBlockAddressTaken();
- Target->setLabelMustBeEmitted();
- // Don't add duplicate machine successors.
- if (Dests.insert(Dest).second)
- addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
}
CallBrMBB->normalizeSuccProbs();
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ed86a10c3a25f..fbf6e087177c6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3249,11 +3249,30 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
}
void Verifier::visitCallBrInst(CallBrInst &CBI) {
- Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
- const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
- Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+ if (!CBI.isInlineAsm()) {
+ switch (CBI.getIntrinsicID()) {
+ case Intrinsic::amdgcn_kill: {
+ Check(CBI.getNumIndirectDests() == 1,
+ "Callbr amdgcn_kill only supports one indirect dest");
+ bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
+ CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
+ Check(Unreachable || (Call && Call->getIntrinsicID() ==
+ Intrinsic::amdgcn_unreachable),
+ "Callbr amdgcn_kill indirect dest needs to be unreachable");
+ visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
+ break;
+ }
+ default:
+ CheckFailed(
+ "Callbr currently only supports asm-goto and selected intrinsics");
+ }
+ visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
+ } else {
+ const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
+ Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
- verifyInlineAsmCall(CBI);
+ verifyInlineAsmCall(CBI);
+ }
visitTerminator(CBI);
}
@@ -5211,7 +5230,7 @@ void Verifier::visitInstruction(Instruction &I) {
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
- Check(!F->isIntrinsic() || isa<CallInst>(I) ||
+ Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
F->getIntrinsicID() == Intrinsic::seh_try_end ||
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d1054b9b045ca..bdd8b5fbb3212 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -486,11 +486,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
- BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
- for (BasicBlock *Succ : Term->successors())
- if (Visited.count(Succ))
- Loops[Succ] = BB;
+ if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); Term)
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
}
}
@@ -522,7 +521,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
- if (!ParentRegion->contains(P))
+ if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;
Region *R = RI->getRegionFor(P);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ce5bf0c7207c7..3090f65fac627 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1907,11 +1907,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->swapSuccessors();
}
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+bool llvm::hasOnlySimpleTerminator(const Function &F, bool AllowCallBr) {
for (auto &BB : F) {
auto *Term = BB.getTerminator();
if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
- isa<BranchInst>(Term)))
+ isa<BranchInst>(Term) || (AllowCallBr && isa<CallBrInst>(Term))))
return false;
}
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
new file mode 100644
index 0000000000000..e2e84dca96cbf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -0,0 +1,70 @@
+; RUN: rm -rf %t && split-file %s %t
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-seldag.s < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=SELDAG %s < %t/with-callbr-seldag.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-gisel.s -global-isel < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=GISEL %s < %t/with-callbr-gisel.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-seldag.s < %t/without-callbr.ll
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-gisel.s -global-isel < %t/without-callbr.ll
+; RUN: diff %t/with-callbr-seldag.s %t/without-callbr-seldag.s
+; RUN: diff %t/with-callbr-gisel.s %t/without-callbr-gisel.s
+
+;--- with-callbr.ll
+
+; SELDAG-LABEL: test_kill:
+; SELDAG-NEXT: ; %bb.0:
+; SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SELDAG-NEXT: flat_load_dword v0, v[0:1]
+; SELDAG-NEXT: v_and_b32_e32 v1, 1, v4
+; SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; SELDAG-NEXT: s_mov_b64 s[4:5], exec
+; SELDAG-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; SELDAG-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; SELDAG-NEXT: s_cbranch_scc0 .LBB0_2
+; SELDAG-NEXT: ; %bb.1:
+; SELDAG-NEXT: s_and_b64 exec, exec, s[4:5]
+; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT: flat_store_dword v[2:3], v0
+; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT: s_setpc_b64 s[30:31]
+; SELDAG-NEXT: .LBB0_2:
+; SELDAG-NEXT: s_mov_b64 exec, 0
+; SELDAG-NEXT: s_endpgm
+
+; GISEL-LABEL: test_kill:
+; GISEL-NEXT: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_load_dword v0, v[0:1]
+; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_store_dword v[2:3], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB0_2:
+; GISEL-NEXT: s_mov_b64 exec, 0
+; GISEL-NEXT: s_endpgm
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+ %a = load i32, ptr %src, align 4
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+ unreachable
+cont:
+ store i32 %a, ptr %dst, align 4
+ ret void
+}
+
+;--- without-callbr.ll
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+ %a = load i32, ptr %src, align 4
+ call void @llvm.amdgcn.kill(i1 %c)
+ store i32 %a, ptr %dst, align 4
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/133907
More information about the llvm-commits
mailing list