[llvm] [IR] Add CallBr intrinsics support (PR #133907)

Robert Imschweiler via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 14 01:33:04 PDT 2025


https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/133907

>From 345da0bb18e151f71b570becf49d2e750102f76b Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 1 Apr 2025 08:03:16 -0500
Subject: [PATCH 1/9] [IR] Add CallBr intrinsics support

This commit adds support for using intrinsics with callbr.
The uses of this will most of the time look like this example:
```llvm
  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
  unreachable
cont:
  ...
```
---
 llvm/include/llvm/Analysis/RegionInfoImpl.h   | 12 ++++
 .../llvm/Transforms/Utils/BasicBlockUtils.h   |  6 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 59 +++++++++++++++-
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 45 +++++++++---
 llvm/lib/IR/Verifier.cpp                      | 29 ++++++--
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 11 ++-
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp |  4 +-
 llvm/test/CodeGen/AMDGPU/callbr.ll            | 70 +++++++++++++++++++
 8 files changed, 207 insertions(+), 29 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/callbr.ll

diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index eb99d8bc6fb23..759e9c47bebb8 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -553,6 +553,18 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
 
   using DST = typename DomFrontierT::DomSetType;
 
+  // TODO? post domination frontier?
+  if constexpr (std::is_same_v<BlockT, BasicBlock>) {
+    if (DomTreeNodeT *PDTNode = PDT->getNode(exit); PDTNode) {
+      for (DomTreeNodeT *PredNode : *PDTNode) {
+        for (BasicBlock *Pred : predecessors(PredNode->getBlock())) {
+          if (isa<CallBrInst>(Pred->getTerminator()))
+            return false;
+        }
+      }
+    }
+  }
+
   DST *entrySuccs = &DF->find(entry)->second;
 
   // Exit is the header of a loop that contains the entry. In this case,
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6faff3d1fd8e3..59143d235eb93 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -606,9 +606,9 @@ bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI,
 // successors
 void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
 
-// Check whether the function only has simple terminator:
-// br/brcond/unreachable/ret
-bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators:
+// br/brcond/unreachable/ret (or callbr if AllowCallBr)
+bool hasOnlySimpleTerminator(const Function &F, bool AllowCallBr = true);
 
 // Returns true if these basic blocks belong to a presplit coroutine and the
 // edge corresponds to the 'default' case in the switch statement in the
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f8afb42bf5535..47c825d67c3ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3009,8 +3009,63 @@ bool IRTranslator::translateInvoke(const User &U,
 
 bool IRTranslator::translateCallBr(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
-  // FIXME: Implement this.
-  return false;
+  const CallBrInst &I = cast<CallBrInst>(U);
+  MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
+
+  // TODO: operand bundles (see SelDAG implementation of callbr)?
+  assert(!I.hasOperandBundles() &&
+         "Cannot lower callbrs with operand bundles yet");
+
+  if (I.isInlineAsm()) {
+    // FIXME: inline asm not yet supported
+    if (!translateInlineAsm(I, MIRBuilder))
+      return false;
+  } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+    switch (I.getIntrinsicID()) {
+    default:
+      report_fatal_error("Unsupported intrinsic for callbr");
+    case Intrinsic::amdgcn_kill:
+      if (I.getNumIndirectDests() != 1)
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      bool Success = translateCall(*CI, MIRBuilder);
+      CI->deleteValue();
+      if (!Success)
+        return false;
+      break;
+    }
+  } else {
+    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+  }
+
+  // Retrieve successors.
+  SmallPtrSet<BasicBlock *, 8> Dests;
+  Dests.insert(I.getDefaultDest());
+  MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
+
+  // Update successor info.
+  addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+  // TODO: For most of the cases where there is an intrinsic callbr, we're
+  // having exactly one indirect target, which will be unreachable. As soon as
+  // this changes, we might need to enhance Target->setIsInlineAsmBrIndirectTarget
+  // or add something similar for intrinsic indirect branches.
+  if (I.isInlineAsm()) {
+    for (BasicBlock *Dest : I.getIndirectDests()) {
+      MachineBasicBlock *Target = &getMBB(*Dest);
+      Target->setIsInlineAsmBrIndirectTarget();
+      Target->setMachineBlockAddressTaken();
+      Target->setLabelMustBeEmitted();
+      // Don't add duplicate machine successors.
+      if (Dests.insert(Dest).second)
+        addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+    }
+  }
+  CallBrMBB->normalizeSuccProbs();
+
+  return true;
 }
 
 bool IRTranslator::translateLandingPad(const User &U,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6db2a5ffbfb84..8d4f7c7e337a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3385,8 +3385,26 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
              {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
          "Cannot lower callbrs with arbitrary operand bundles yet!");
 
-  assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
-  visitInlineAsm(I);
+  if (I.isInlineAsm()) {
+    visitInlineAsm(I);
+  } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+    switch (I.getIntrinsicID()) {
+    default:
+      report_fatal_error("Unsupported intrinsic for callbr");
+    case Intrinsic::amdgcn_kill:
+      if (I.getNumIndirectDests() != 1)
+          report_fatal_error(
+              "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      visitCall(*CI);
+      CI->deleteValue();
+      break;
+    }
+  } else {
+    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+  }
   CopyToExportRegsIfNeeded(&I);
 
   // Retrieve successors.
@@ -3396,15 +3414,20 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
 
   // Update successor info.
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
-  for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
-    BasicBlock *Dest = I.getIndirectDest(i);
-    MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
-    Target->setIsInlineAsmBrIndirectTarget();
-    Target->setMachineBlockAddressTaken();
-    Target->setLabelMustBeEmitted();
-    // Don't add duplicate machine successors.
-    if (Dests.insert(Dest).second)
-      addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+  // TODO: For most of the cases where there is an intrinsic callbr, we're
+  // having exactly one indirect target, which will be unreachable. As soon as
+  // this changes, we might need to enhance Target->setIsInlineAsmBrIndirectTarget
+  // or add something similar for intrinsic indirect branches.
+  if (I.isInlineAsm()) {
+    for (BasicBlock *Dest : I.getIndirectDests()) {
+      MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+      Target->setIsInlineAsmBrIndirectTarget();
+      Target->setMachineBlockAddressTaken();
+      Target->setLabelMustBeEmitted();
+      // Don't add duplicate machine successors.
+      if (Dests.insert(Dest).second)
+        addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+    }
   }
   CallBrMBB->normalizeSuccProbs();
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ed86a10c3a25f..5f2d0fcea4dcd 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3249,11 +3249,30 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
 }
 
 void Verifier::visitCallBrInst(CallBrInst &CBI) {
-  Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
-  const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
-  Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+  if (!CBI.isInlineAsm()) {
+    switch (CBI.getIntrinsicID()) {
+    case Intrinsic::amdgcn_kill: {
+      Check(CBI.getNumIndirectDests() == 1,
+            "Callbr amdgcn_kill only supports one indirect dest");
+      bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
+      CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
+      Check(Unreachable ||
+                (Call && Call->getIntrinsicID() == Intrinsic::amdgcn_unreachable),
+            "Callbr amdgcn_kill indirect dest needs to be unreachable");
+      visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
+      break;
+    }
+    default:
+      CheckFailed(
+          "Callbr currently only supports asm-goto and selected intrinsics");
+    }
+    visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
+  } else {
+    const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
+    Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
 
-  verifyInlineAsmCall(CBI);
+    verifyInlineAsmCall(CBI);
+  }
   visitTerminator(CBI);
 }
 
@@ -5211,7 +5230,7 @@ void Verifier::visitInstruction(Instruction &I) {
              (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
              IsAttachedCallOperand(F, CBI, i)),
             "Cannot take the address of an intrinsic!", &I);
-      Check(!F->isIntrinsic() || isa<CallInst>(I) ||
+      Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
                 F->getIntrinsicID() == Intrinsic::donothing ||
                 F->getIntrinsicID() == Intrinsic::seh_try_begin ||
                 F->getIntrinsicID() == Intrinsic::seh_try_end ||
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d1054b9b045ca..bdd8b5fbb3212 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -486,11 +486,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
   } else {
     // Test for successors as back edge
     BasicBlock *BB = N->getNodeAs<BasicBlock>();
-    BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
-    for (BasicBlock *Succ : Term->successors())
-      if (Visited.count(Succ))
-        Loops[Succ] = BB;
+    if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); Term)
+      for (BasicBlock *Succ : Term->successors())
+        if (Visited.count(Succ))
+          Loops[Succ] = BB;
   }
 }
 
@@ -522,7 +521,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
 
   for (BasicBlock *P : predecessors(BB)) {
     // Ignore it if it's a branch from outside into our region entry
-    if (!ParentRegion->contains(P))
+    if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
       continue;
 
     Region *R = RI->getRegionFor(P);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ce5bf0c7207c7..3090f65fac627 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1907,11 +1907,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
   PBI->swapSuccessors();
 }
 
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+bool llvm::hasOnlySimpleTerminator(const Function &F, bool AllowCallBr) {
   for (auto &BB : F) {
     auto *Term = BB.getTerminator();
     if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
-          isa<BranchInst>(Term)))
+          isa<BranchInst>(Term) || (AllowCallBr && isa<CallBrInst>(Term))))
       return false;
   }
   return true;
diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
new file mode 100644
index 0000000000000..e2e84dca96cbf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -0,0 +1,70 @@
+; RUN: rm -rf %t && split-file %s %t
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-seldag.s < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=SELDAG %s < %t/with-callbr-seldag.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-gisel.s -global-isel < %t/with-callbr.ll
+; RUN: FileCheck --check-prefix=GISEL %s < %t/with-callbr-gisel.s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-seldag.s < %t/without-callbr.ll
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-gisel.s -global-isel < %t/without-callbr.ll
+; RUN: diff %t/with-callbr-seldag.s %t/without-callbr-seldag.s
+; RUN: diff %t/with-callbr-gisel.s %t/without-callbr-gisel.s
+
+;--- with-callbr.ll
+
+; SELDAG-LABEL: test_kill:
+; SELDAG-NEXT:  ; %bb.0:
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_load_dword v0, v[0:1]
+; SELDAG-NEXT:      v_and_b32_e32 v1, 1, v4
+; SELDAG-NEXT:      v_cmp_eq_u32_e32 vcc, 1, v1
+; SELDAG-NEXT:      s_mov_b64 s[4:5], exec
+; SELDAG-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; SELDAG-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; SELDAG-NEXT:      s_cbranch_scc0 .LBB0_2
+; SELDAG-NEXT:  ; %bb.1:
+; SELDAG-NEXT:      s_and_b64 exec, exec, s[4:5]
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_store_dword v[2:3], v0
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      s_setpc_b64 s[30:31]
+; SELDAG-NEXT:  .LBB0_2:
+; SELDAG-NEXT:      s_mov_b64 exec, 0
+; SELDAG-NEXT:      s_endpgm
+
+; GISEL-LABEL: test_kill:
+; GISEL-NEXT:  ; %bb.0:
+; GISEL-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_load_dword v0, v[0:1]
+; GISEL-NEXT:      v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT:      v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:      s_mov_b64 s[4:5], exec
+; GISEL-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT:      s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:      s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_store_dword v[2:3], v0
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_2:
+; GISEL-NEXT:      s_mov_b64 exec, 0
+; GISEL-NEXT:      s_endpgm
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+  %a = load i32, ptr %src, align 4
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+  unreachable
+cont:
+  store i32 %a, ptr %dst, align 4
+  ret void
+}
+
+;--- without-callbr.ll
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+  %a = load i32, ptr %src, align 4
+  call void @llvm.amdgcn.kill(i1 %c)
+  store i32 %a, ptr %dst, align 4
+  ret void
+}

>From 4305f2cf33e4aeaba2008641ef68d4c38f60d00b Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 1 Apr 2025 08:07:44 -0500
Subject: [PATCH 2/9] fix formatting

---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp          | 5 +++--
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 9 +++++----
 llvm/lib/IR/Verifier.cpp                              | 4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 47c825d67c3ea..0f698375ad6cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3050,8 +3050,9 @@ bool IRTranslator::translateCallBr(const User &U,
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
   // TODO: For most of the cases where there is an intrinsic callbr, we're
   // having exactly one indirect target, which will be unreachable. As soon as
-  // this changes, we might need to enhance Target->setIsInlineAsmBrIndirectTarget
-  // or add something similar for intrinsic indirect branches.
+  // this changes, we might need to enhance
+  // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+  // intrinsic indirect branches.
   if (I.isInlineAsm()) {
     for (BasicBlock *Dest : I.getIndirectDests()) {
       MachineBasicBlock *Target = &getMBB(*Dest);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8d4f7c7e337a3..c9501128cd593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3393,8 +3393,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
       report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
       if (I.getNumIndirectDests() != 1)
-          report_fatal_error(
-              "amdgcn.kill supportes exactly one indirect destination");
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
       CallInst *CI =
           CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
                            SmallVector<Value *, 1>(I.args()));
@@ -3416,8 +3416,9 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
   // TODO: For most of the cases where there is an intrinsic callbr, we're
   // having exactly one indirect target, which will be unreachable. As soon as
-  // this changes, we might need to enhance Target->setIsInlineAsmBrIndirectTarget
-  // or add something similar for intrinsic indirect branches.
+  // this changes, we might need to enhance
+  // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+  // intrinsic indirect branches.
   if (I.isInlineAsm()) {
     for (BasicBlock *Dest : I.getIndirectDests()) {
       MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 5f2d0fcea4dcd..fbf6e087177c6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3256,8 +3256,8 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
             "Callbr amdgcn_kill only supports one indirect dest");
       bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
       CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
-      Check(Unreachable ||
-                (Call && Call->getIntrinsicID() == Intrinsic::amdgcn_unreachable),
+      Check(Unreachable || (Call && Call->getIntrinsicID() ==
+                                        Intrinsic::amdgcn_unreachable),
             "Callbr amdgcn_kill indirect dest needs to be unreachable");
       visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
       break;

>From db57ce26f1780f285db04481f22c56d8262d60ba Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 2 Apr 2025 04:26:22 -0500
Subject: [PATCH 3/9] implement feedback

---
 llvm/include/llvm/CodeGen/Analysis.h          |  2 +-
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    | 18 ++---
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  6 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |  6 +-
 llvm/include/llvm/IR/DiagnosticInfo.h         |  4 +-
 llvm/include/llvm/IR/IntrinsicInst.h          |  8 ++-
 llvm/lib/CodeGen/Analysis.cpp                 |  2 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 34 ++++-----
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 71 +++++++++----------
 .../SelectionDAG/SelectionDAGBuilder.h        | 61 ++++++++--------
 .../SelectionDAG/StatepointLowering.cpp       |  2 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  2 +-
 llvm/lib/IR/DiagnosticInfo.cpp                |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  4 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |  3 +-
 .../Target/Hexagon/HexagonISelLowering.cpp    |  2 +-
 llvm/lib/Target/Hexagon/HexagonISelLowering.h |  2 +-
 .../LoongArch/LoongArchISelLowering.cpp       |  2 +-
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  7 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |  2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  7 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  9 ++-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |  2 +-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |  2 +-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.h     |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.h     |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.h         |  2 +-
 36 files changed, 142 insertions(+), 150 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index 362cc30bbd06a..a4a604dd2c608 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -150,7 +150,7 @@ bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I,
 
 /// Returns true if the parent of \p CI returns CI's first argument after
 /// calling \p CI.
-bool funcReturnsFirstArgOfCall(const CallInst &CI);
+bool funcReturnsFirstArgOfCall(const CallBase &CI);
 
 DenseMap<const MachineBasicBlock *, int>
 getEHScopeMembership(const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 6fd05c8fddd5f..d3f731cf02be9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -235,26 +235,26 @@ class IRTranslator : public MachineFunctionPass {
   bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);
 
   /// Translate an LLVM string intrinsic (memcpy, memset, ...).
-  bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
+  bool translateMemFunc(const CallBase &CI, MachineIRBuilder &MIRBuilder,
                         unsigned Opcode);
 
   /// Translate an LLVM trap intrinsic (trap, debugtrap, ubsantrap).
-  bool translateTrap(const CallInst &U, MachineIRBuilder &MIRBuilder,
+  bool translateTrap(const CallBase &U, MachineIRBuilder &MIRBuilder,
                      unsigned Opcode);
 
   // Translate @llvm.vector.interleave2 and
   // @llvm.vector.deinterleave2 intrinsics for fixed-width vector
   // types into vector shuffles.
-  bool translateVectorInterleave2Intrinsic(const CallInst &CI,
+  bool translateVectorInterleave2Intrinsic(const CallBase &CI,
                                            MachineIRBuilder &MIRBuilder);
-  bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
+  bool translateVectorDeinterleave2Intrinsic(const CallBase &CI,
                                              MachineIRBuilder &MIRBuilder);
 
   void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
 
-  bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+  bool translateOverflowIntrinsic(const CallBase &CI, unsigned Op,
                                   MachineIRBuilder &MIRBuilder);
-  bool translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+  bool translateFixedPointIntrinsic(unsigned Op, const CallBase &CI,
                                     MachineIRBuilder &MIRBuilder);
 
   /// Helper function for translateSimpleIntrinsic.
@@ -265,13 +265,13 @@ class IRTranslator : public MachineFunctionPass {
 
   /// Translates the intrinsics defined in getSimpleIntrinsicOpcode.
   /// \return true if the translation succeeded.
-  bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+  bool translateSimpleIntrinsic(const CallBase &CI, Intrinsic::ID ID,
                                 MachineIRBuilder &MIRBuilder);
 
   bool translateConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI,
                                        MachineIRBuilder &MIRBuilder);
 
-  bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+  bool translateKnownIntrinsic(const CallBase &CI, Intrinsic::ID ID,
                                MachineIRBuilder &MIRBuilder);
 
   /// Returns the single livein physical register Arg was lowered to, if
@@ -588,7 +588,7 @@ class IRTranslator : public MachineFunctionPass {
     return false;
   }
 
-  bool translateConvergenceControlIntrinsic(const CallInst &CI,
+  bool translateConvergenceControlIntrinsic(const CallBase &CI,
                                             Intrinsic::ID ID,
                                             MachineIRBuilder &MIRBuilder);
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 15a2370e5d8b8..c690b4106900d 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1205,7 +1205,7 @@ class SelectionDAG {
    * the tail call optimization decision. */
   SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, Align Alignment, bool isVol,
-                    bool AlwaysInline, const CallInst *CI,
+                    bool AlwaysInline, const CallBase *CI,
                     std::optional<bool> OverrideTailCall,
                     MachinePointerInfo DstPtrInfo,
                     MachinePointerInfo SrcPtrInfo,
@@ -1217,7 +1217,7 @@ class SelectionDAG {
    * the tail call optimization decision. */
   SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                      SDValue Size, Align Alignment, bool isVol,
-                     const CallInst *CI, std::optional<bool> OverrideTailCall,
+                     const CallBase *CI, std::optional<bool> OverrideTailCall,
                      MachinePointerInfo DstPtrInfo,
                      MachinePointerInfo SrcPtrInfo,
                      const AAMDNodes &AAInfo = AAMDNodes(),
@@ -1225,7 +1225,7 @@ class SelectionDAG {
 
   SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, Align Alignment, bool isVol,
-                    bool AlwaysInline, const CallInst *CI,
+                    bool AlwaysInline, const CallBase *CI,
                     MachinePointerInfo DstPtrInfo,
                     const AAMDNodes &AAInfo = AAMDNodes());
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 58ac87206b9a6..fd7a5b1442d3f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1238,7 +1238,7 @@ class TargetLoweringBase {
   /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
   /// true and store the intrinsic information into the IntrinsicInfo that was
   /// passed to the function.
-  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
                                   MachineFunction &,
                                   unsigned /*Intrinsic*/) const {
     return false;
@@ -5149,9 +5149,9 @@ class TargetLowering : public TargetLoweringBase {
                                               const AsmOperandInfo &OpInfo,
                                               SelectionDAG &DAG) const;
 
-  // Targets may override this function to collect operands from the CallInst
+  // Targets may override this function to collect operands from the CallBase
   // and for example, lower them into the SelectionDAG operands.
-  virtual void CollectTargetIntrinsicOperands(const CallInst &I,
+  virtual void CollectTargetIntrinsicOperands(const CallBase &I,
                                               SmallVectorImpl<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 779c88993b71c..9b6998397b944 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -38,7 +38,7 @@ namespace llvm {
 class DiagnosticPrinter;
 class DIFile;
 class DISubprogram;
-class CallInst;
+class CallBase;
 class Function;
 class Instruction;
 class InstructionCost;
@@ -1151,7 +1151,7 @@ class DiagnosticInfoSrcMgr : public DiagnosticInfo {
   }
 };
 
-void diagnoseDontCall(const CallInst &CI);
+void diagnoseDontCall(const CallBase &CI);
 
 class DiagnosticInfoDontCall : public DiagnosticInfo {
   StringRef CalleeName;
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 93750d6e3845e..7a760781d2b96 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -134,8 +134,14 @@ class IntrinsicInst : public CallInst {
       return CF->isIntrinsic();
     return false;
   }
+  static bool classof(const CallBase *I) {
+    if (const Function *CF = I->getCalledFunction())
+      return CF->isIntrinsic();
+    return false;
+  }
   static bool classof(const Value *V) {
-    return isa<CallInst>(V) && classof(cast<CallInst>(V));
+    return (isa<CallInst>(V) && classof(cast<CallInst>(V))) ||
+           (isa<CallBase>(V) && classof(cast<CallBase>(V)));
   }
 };
 
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e7b9417de8c9f..f950bb756fb2b 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -712,7 +712,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
   return true;
 }
 
-bool llvm::funcReturnsFirstArgOfCall(const CallInst &CI) {
+bool llvm::funcReturnsFirstArgOfCall(const CallBase &CI) {
   const ReturnInst *Ret = dyn_cast<ReturnInst>(CI.getParent()->getTerminator());
   Value *RetVal = Ret ? Ret->getReturnValue() : nullptr;
   bool ReturnsFirstArg = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 0f698375ad6cf..9323966d9ada6 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1694,7 +1694,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
   return true;
 }
 
-bool IRTranslator::translateMemFunc(const CallInst &CI,
+bool IRTranslator::translateMemFunc(const CallBase &CI,
                                     MachineIRBuilder &MIRBuilder,
                                     unsigned Opcode) {
   const Value *SrcPtr = CI.getArgOperand(1);
@@ -1785,7 +1785,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
   return true;
 }
 
-bool IRTranslator::translateTrap(const CallInst &CI,
+bool IRTranslator::translateTrap(const CallBase &CI,
                                  MachineIRBuilder &MIRBuilder,
                                  unsigned Opcode) {
   StringRef TrapFuncName =
@@ -1812,7 +1812,7 @@ bool IRTranslator::translateTrap(const CallInst &CI,
 }
 
 bool IRTranslator::translateVectorInterleave2Intrinsic(
-    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+    const CallBase &CI, MachineIRBuilder &MIRBuilder) {
   assert(CI.getIntrinsicID() == Intrinsic::vector_interleave2 &&
          "This function can only be called on the interleave2 intrinsic!");
   // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
@@ -1828,7 +1828,7 @@ bool IRTranslator::translateVectorInterleave2Intrinsic(
 }
 
 bool IRTranslator::translateVectorDeinterleave2Intrinsic(
-    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
+    const CallBase &CI, MachineIRBuilder &MIRBuilder) {
   assert(CI.getIntrinsicID() == Intrinsic::vector_deinterleave2 &&
          "This function can only be called on the deinterleave2 intrinsic!");
   // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
@@ -1868,7 +1868,7 @@ void IRTranslator::getStackGuard(Register DstReg,
   MIB.setMemRefs({MemRef});
 }
 
-bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+bool IRTranslator::translateOverflowIntrinsic(const CallBase &CI, unsigned Op,
                                               MachineIRBuilder &MIRBuilder) {
   ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
   MIRBuilder.buildInstr(
@@ -1878,7 +1878,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
   return true;
 }
 
-bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallBase &CI,
                                                 MachineIRBuilder &MIRBuilder) {
   Register Dst = getOrCreateVReg(CI);
   Register Src0 = getOrCreateVReg(*CI.getOperand(0));
@@ -2023,7 +2023,7 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
   return Intrinsic::not_intrinsic;
 }
 
-bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
+bool IRTranslator::translateSimpleIntrinsic(const CallBase &CI,
                                             Intrinsic::ID ID,
                                             MachineIRBuilder &MIRBuilder) {
 
@@ -2145,7 +2145,7 @@ static unsigned getConvOpcode(Intrinsic::ID ID) {
 }
 
 bool IRTranslator::translateConvergenceControlIntrinsic(
-    const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
+    const CallBase &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
   MachineInstrBuilder MIB = MIRBuilder.buildInstr(getConvOpcode(ID));
   Register OutputReg = getOrCreateConvergenceTokenVReg(CI);
   MIB.addDef(OutputReg);
@@ -2161,7 +2161,7 @@ bool IRTranslator::translateConvergenceControlIntrinsic(
   return true;
 }
 
-bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+bool IRTranslator::translateKnownIntrinsic(const CallBase &CI, Intrinsic::ID ID,
                                            MachineIRBuilder &MIRBuilder) {
   if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
     if (ORE->enabled()) {
@@ -2756,7 +2756,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   if (containsBF16Type(U))
     return false;
 
-  const CallInst &CI = cast<CallInst>(U);
+  const CallBase &CI = cast<CallBase>(U);
   const Function *F = CI.getCalledFunction();
 
   // FIXME: support Windows dllimport function calls and calls through
@@ -3023,22 +3023,14 @@ bool IRTranslator::translateCallBr(const User &U,
   } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
     switch (I.getIntrinsicID()) {
     default:
-      report_fatal_error("Unsupported intrinsic for callbr");
+      return false;
     case Intrinsic::amdgcn_kill:
-      if (I.getNumIndirectDests() != 1)
-        report_fatal_error(
-            "amdgcn.kill supportes exactly one indirect destination");
-      CallInst *CI =
-          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
-                           SmallVector<Value *, 1>(I.args()));
-      bool Success = translateCall(*CI, MIRBuilder);
-      CI->deleteValue();
-      if (!Success)
+      if (!translateCall(I, MIRBuilder))
         return false;
       break;
     }
   } else {
-    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+    return false;
   }
 
   // Retrieve successors.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7ce4eebf685e1..7b75e43e0b08e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8518,7 +8518,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
 
 SDValue SelectionDAG::getMemcpy(
     SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
-    Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
+    Align Alignment, bool isVol, bool AlwaysInline, const CallBase *CI,
     std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo,
     MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo,
     BatchAAResults *BatchAA) {
@@ -8644,7 +8644,7 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                  SDValue Src, SDValue Size, Align Alignment,
-                                 bool isVol, const CallInst *CI,
+                                 bool isVol, const CallBase *CI,
                                  std::optional<bool> OverrideTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo,
@@ -8762,7 +8762,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
 SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                 SDValue Src, SDValue Size, Align Alignment,
                                 bool isVol, bool AlwaysInline,
-                                const CallInst *CI,
+                                const CallBase *CI,
                                 MachinePointerInfo DstPtrInfo,
                                 const AAMDNodes &AAInfo) {
   // Check to see if we should lower the memset to stores first.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c9501128cd593..ebca27d79406e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3392,14 +3392,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
     default:
       report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
-      if (I.getNumIndirectDests() != 1)
-        report_fatal_error(
-            "amdgcn.kill supportes exactly one indirect destination");
-      CallInst *CI =
-          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
-                           SmallVector<Value *, 1>(I.args()));
-      visitCall(*CI);
-      CI->deleteValue();
+      visitCallBase(I);
       break;
     }
   } else {
@@ -4758,7 +4751,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   DAG.setRoot(StoreNode);
 }
 
-void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+void SelectionDAGBuilder::visitMaskedStore(const CallBase &I,
                                            bool IsCompressing) {
   SDLoc sdl = getCurSDLoc();
 
@@ -4889,7 +4882,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
   return true;
 }
 
-void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedScatter(const CallBase &I) {
   SDLoc sdl = getCurSDLoc();
 
   // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
@@ -4934,7 +4927,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   setValue(&I, Scatter);
 }
 
-void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
+void SelectionDAGBuilder::visitMaskedLoad(const CallBase &I, bool IsExpanding) {
   SDLoc sdl = getCurSDLoc();
 
   auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
@@ -5003,7 +4996,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   setValue(&I, Res);
 }
 
-void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedGather(const CallBase &I) {
   SDLoc sdl = getCurSDLoc();
 
   // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
@@ -5233,7 +5226,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallBase &I,
                                                unsigned Intrinsic) {
   // Ignore the callsite's attributes. A specific call site may be marked with
   // readnone, but the lowering code will expect the chain based on the
@@ -6309,7 +6302,7 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue(
 }
 
 /// Lower the call to the specified intrinsic function.
-void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
+void SelectionDAGBuilder::visitConvergenceControl(const CallBase &I,
                                                   unsigned Intrinsic) {
   SDLoc sdl = getCurSDLoc();
   switch (Intrinsic) {
@@ -6329,7 +6322,7 @@ void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
   }
 }
 
-void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
+void SelectionDAGBuilder::visitVectorHistogram(const CallBase &I,
                                                unsigned IntrinsicID) {
   // For now, we're only lowering an 'add' histogram.
   // We can add others later, e.g. saturating adds, min/max.
@@ -6387,7 +6380,7 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
   DAG.setRoot(Histogram);
 }
 
-void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
+void SelectionDAGBuilder::visitVectorExtractLastActive(const CallBase &I,
                                                        unsigned Intrinsic) {
   assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
          "Tried lowering invalid vector extract last");
@@ -6415,7 +6408,7 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
 }
 
 /// Lower the call to the specified intrinsic function.
-void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+void SelectionDAGBuilder::visitIntrinsicCall(const CallBase &I,
                                              unsigned Intrinsic) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDLoc sdl = getCurSDLoc();
@@ -9047,7 +9040,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallBase &I) {
   const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
   const Value *Size = I.getArgOperand(2);
   const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
@@ -9139,7 +9132,7 @@ bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemChrCall(const CallBase &I) {
   const Value *Src = I.getArgOperand(0);
   const Value *Char = I.getArgOperand(1);
   const Value *Length = I.getArgOperand(2);
@@ -9163,7 +9156,7 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallBase &I) {
   SDValue Dst = getValue(I.getArgOperand(0));
   SDValue Src = getValue(I.getArgOperand(1));
   SDValue Size = getValue(I.getArgOperand(2));
@@ -9202,7 +9195,7 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
+bool SelectionDAGBuilder::visitStrCpyCall(const CallBase &I, bool isStpcpy) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9225,7 +9218,7 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitStrCmpCall(const CallBase &I) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9248,7 +9241,7 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitStrLenCall(const CallBase &I) {
   const Value *Arg0 = I.getArgOperand(0);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9269,7 +9262,7 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
+bool SelectionDAGBuilder::visitStrNLenCall(const CallBase &I) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9291,7 +9284,7 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
 /// false and it will be lowered like a normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallBase &I,
                                               unsigned Opcode) {
   // We already checked this call's prototype; verify it doesn't modify errno.
   if (!I.onlyReadsMemory())
@@ -9311,7 +9304,7 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
 /// false, and it will be lowered like a normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallBase &I,
                                                unsigned Opcode) {
   // We already checked this call's prototype; verify it doesn't modify errno.
   if (!I.onlyReadsMemory())
@@ -9327,7 +9320,9 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
   return true;
 }
 
-void SelectionDAGBuilder::visitCall(const CallInst &I) {
+void SelectionDAGBuilder::visitCall(const CallInst &I) { visitCallBase(I); }
+
+void SelectionDAGBuilder::visitCallBase(const CallBase &I) {
   // Handle inline assembly differently.
   if (I.isInlineAsm()) {
     visitInlineAsm(I);
@@ -10490,7 +10485,7 @@ void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
   setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
 }
 
-void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+void SelectionDAGBuilder::visitVAStart(const CallBase &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
@@ -10512,14 +10507,14 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   setValue(&I, V);
 }
 
-void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+void SelectionDAGBuilder::visitVAEnd(const CallBase &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
                           DAG.getSrcValue(I.getArgOperand(0))));
 }
 
-void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+void SelectionDAGBuilder::visitVACopy(const CallBase &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
@@ -10637,7 +10632,7 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
 }
 
 /// Lower llvm.experimental.stackmap.
-void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
+void SelectionDAGBuilder::visitStackmap(const CallBase &CI) {
   // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
   //                                  [live variables...])
 
@@ -10849,7 +10844,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
   FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 }
 
-void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
+void SelectionDAGBuilder::visitVectorReduce(const CallBase &I,
                                             unsigned Intrinsic) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Op1 = getValue(I.getArgOperand(0));
@@ -12524,14 +12519,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   }
 }
 
-void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+void SelectionDAGBuilder::visitStepVector(const CallBase &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   auto DL = getCurSDLoc();
   EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getStepVector(DL, ResultVT));
 }
 
-void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
+void SelectionDAGBuilder::visitVectorReverse(const CallBase &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
@@ -12554,7 +12549,7 @@ void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
   setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
 }
 
-void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I,
+void SelectionDAGBuilder::visitVectorDeinterleave(const CallBase &I,
                                                   unsigned Factor) {
   auto DL = getCurSDLoc();
   SDValue InVec = getValue(I.getOperand(0));
@@ -12590,7 +12585,7 @@ void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I,
   setValue(&I, Res);
 }
 
-void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I,
+void SelectionDAGBuilder::visitVectorInterleave(const CallBase &I,
                                                 unsigned Factor) {
   auto DL = getCurSDLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -12644,7 +12639,7 @@ void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
                            DAG.getVTList(ValueVTs), Values));
 }
 
-void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
+void SelectionDAGBuilder::visitVectorSplice(const CallBase &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
@@ -12718,7 +12713,7 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
 //   setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
 // otherwise we will end up with copies of virtregs only valid along direct
 // edges.
-void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
+void SelectionDAGBuilder::visitCallBrLandingPad(const CallBase &I) {
   SmallVector<EVT, 8> ResultVTs;
   SmallVector<SDValue, 8> ResultValues;
   const auto *CBR =
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 35c15bc269d4b..483e0b9d2db6c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -483,7 +483,7 @@ class SelectionDAGBuilder {
   void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
                                     const BasicBlock *EHPadBB);
 
-  void LowerDeoptimizeCall(const CallInst *CI);
+  void LowerDeoptimizeCall(const CallBase *CI);
   void LowerDeoptimizingReturn();
 
   void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
@@ -537,7 +537,7 @@ class SelectionDAGBuilder {
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
   void visitCallBr(const CallBrInst &I);
-  void visitCallBrLandingPad(const CallInst &I);
+  void visitCallBrLandingPad(const CallBase &I);
   void visitResume(const ResumeInst &I);
 
   void visitUnary(const User &I, unsigned Opcode);
@@ -594,24 +594,25 @@ class SelectionDAGBuilder {
   void visitAlloca(const AllocaInst &I);
   void visitLoad(const LoadInst &I);
   void visitStore(const StoreInst &I);
-  void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
-  void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
-  void visitMaskedGather(const CallInst &I);
-  void visitMaskedScatter(const CallInst &I);
+  void visitMaskedLoad(const CallBase &I, bool IsExpanding = false);
+  void visitMaskedStore(const CallBase &I, bool IsCompressing = false);
+  void visitMaskedGather(const CallBase &I);
+  void visitMaskedScatter(const CallBase &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
   void visitAtomicRMW(const AtomicRMWInst &I);
   void visitFence(const FenceInst &I);
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
-  bool visitMemCmpBCmpCall(const CallInst &I);
-  bool visitMemPCpyCall(const CallInst &I);
-  bool visitMemChrCall(const CallInst &I);
-  bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
-  bool visitStrCmpCall(const CallInst &I);
-  bool visitStrLenCall(const CallInst &I);
-  bool visitStrNLenCall(const CallInst &I);
-  bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
-  bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
+  void visitCallBase(const CallBase &I);
+  bool visitMemCmpBCmpCall(const CallBase &I);
+  bool visitMemPCpyCall(const CallBase &I);
+  bool visitMemChrCall(const CallBase &I);
+  bool visitStrCpyCall(const CallBase &I, bool isStpcpy);
+  bool visitStrCmpCall(const CallBase &I);
+  bool visitStrLenCall(const CallBase &I);
+  bool visitStrNLenCall(const CallBase &I);
+  bool visitUnaryFloatCall(const CallBase &I, unsigned Opcode);
+  bool visitBinaryFloatCall(const CallBase &I, unsigned Opcode);
   void visitAtomicLoad(const LoadInst &I);
   void visitAtomicStore(const StoreInst &I);
   void visitLoadFromSwiftError(const LoadInst &I);
@@ -624,12 +625,12 @@ class SelectionDAGBuilder {
   bool visitEntryValueDbgValue(ArrayRef<const Value *> Values,
                                DILocalVariable *Variable, DIExpression *Expr,
                                DebugLoc DbgLoc);
-  void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+  void visitIntrinsicCall(const CallBase &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallBase &I, unsigned Intrinsic);
   void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
-  void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
-  void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID);
-  void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
+  void visitConvergenceControl(const CallBase &I, unsigned Intrinsic);
+  void visitVectorHistogram(const CallBase &I, unsigned IntrinsicID);
+  void visitVectorExtractLastActive(const CallBase &I, unsigned Intrinsic);
   void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
                    const SmallVectorImpl<SDValue> &OpValues);
   void visitVPStore(const VPIntrinsic &VPIntrin,
@@ -645,23 +646,23 @@ class SelectionDAGBuilder {
   void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
   void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
 
-  void visitVAStart(const CallInst &I);
+  void visitVAStart(const CallBase &I);
   void visitVAArg(const VAArgInst &I);
-  void visitVAEnd(const CallInst &I);
-  void visitVACopy(const CallInst &I);
-  void visitStackmap(const CallInst &I);
+  void visitVAEnd(const CallBase &I);
+  void visitVACopy(const CallBase &I);
+  void visitStackmap(const CallBase &I);
   void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr);
 
   // These two are implemented in StatepointLowering.cpp
   void visitGCRelocate(const GCRelocateInst &Relocate);
   void visitGCResult(const GCResultInst &I);
 
-  void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
-  void visitVectorReverse(const CallInst &I);
-  void visitVectorSplice(const CallInst &I);
-  void visitVectorInterleave(const CallInst &I, unsigned Factor);
-  void visitVectorDeinterleave(const CallInst &I, unsigned Factor);
-  void visitStepVector(const CallInst &I);
+  void visitVectorReduce(const CallBase &I, unsigned Intrinsic);
+  void visitVectorReverse(const CallBase &I);
+  void visitVectorSplice(const CallBase &I);
+  void visitVectorInterleave(const CallBase &I, unsigned Factor);
+  void visitVectorDeinterleave(const CallBase &I, unsigned Factor);
+  void visitStepVector(const CallBase &I);
 
   void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 80aeefe8e068a..5227fd6a60e7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1303,7 +1303,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
   setValue(&Relocate, SD);
 }
 
-void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallBase *CI) {
   const auto &TLI = DAG.getTargetLoweringInfo();
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
                                          TLI.getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f1649a3903fac..72e62e17a327a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5655,7 +5655,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 }
 
 void TargetLowering::CollectTargetIntrinsicOperands(
-    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 0e526ada4b405..83b2f71c28185 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -456,7 +456,7 @@ void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
 
-void llvm::diagnoseDontCall(const CallInst &CI) {
+void llvm::diagnoseDontCall(const CallBase &CI) {
   const auto *F =
       dyn_cast<Function>(CI.getCalledOperand()->stripPointerCasts());
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1c8e3afdfd718..3f628dbf49abb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16373,7 +16373,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
 template <unsigned NumVecs>
 static bool
 setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
-              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
+              AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI) {
   Info.opc = ISD::INTRINSIC_VOID;
   // Retrieve EC from first vector argument.
   const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
@@ -16398,7 +16398,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallInst &I,
+                                               const CallBase &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index bc0c3a832bb28..e1fadc641ebdb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -701,7 +701,7 @@ class AArch64TargetLowering : public TargetLowering {
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b0c18715ef810..b42861147eb75 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1210,7 +1210,7 @@ MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
 }
 
 bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                          const CallInst &CI,
+                                          const CallBase &CI,
                                           MachineFunction &MF,
                                           unsigned IntrID) const {
   Info.flags = MachineMemOperand::MONone;
@@ -1496,7 +1496,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 }
 
 void SITargetLowering::CollectTargetIntrinsicOperands(
-    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
   switch (cast<IntrinsicInst>(I).getIntrinsicID()) {
   case Intrinsic::amdgcn_addrspacecast_nonnull: {
     // The DAG's ValueType loses the addrspaces.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 8e4717a3f64ab..6d7f2d16a458e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -310,11 +310,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
   MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
+  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
                           MachineFunction &MF,
                           unsigned IntrinsicID) const override;
 
-  void CollectTargetIntrinsicOperands(const CallInst &I,
+  void CollectTargetIntrinsicOperands(const CallBase &I,
                                       SmallVectorImpl<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d2f9ec982ae01..f72070521b940 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20953,7 +20953,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 9fad056edd3f1..883dbaa49d2ed 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -616,8 +616,7 @@ class VectorType;
     bool isFPImmLegal(const APFloat &Imm, EVT VT,
                       bool ForCodeSize = false) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                            const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1710488e4e292..ab6f0d5588e8d 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2079,7 +2079,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
 /// true and store the intrinsic information into the IntrinsicInfo that was
 /// passed to the function.
 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallInst &I,
+                                               const CallBase &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 4df88b3a8abd7..2047c905fefd0 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -142,7 +142,7 @@ class HexagonTargetLowering : public TargetLowering {
       const SmallVectorImpl<SDValue> &OutVals,
       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 900775eedfa7b..46c416255fca4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6043,7 +6043,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
 }
 
 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                 const CallInst &I,
+                                                 const CallBase &I,
                                                  MachineFunction &MF,
                                                  unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 002fad0e20759..9021c09b6bc25 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -212,7 +212,7 @@ class LoongArchTargetLowering : public TargetLowering {
                                           Value *NewVal, Value *Mask,
                                           AtomicOrdering Ord) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 06e221777b7ea..a223190bf9f11 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3632,9 +3632,10 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool NVPTXTargetLowering::getTgtMemIntrinsic(
-    IntrinsicInfo &Info, const CallInst &I,
-    MachineFunction &MF, unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                             const CallBase &I,
+                                             MachineFunction &MF,
+                                             unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 39470be254efa..74ad5ef704c0d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -118,7 +118,7 @@ class NVPTXTargetLowering : public TargetLowering {
 
   const char *getTargetNodeName(unsigned Opcode) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ab78f33f5a630..0066ac5d41b8c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17478,9 +17478,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
-                                              SmallVectorImpl<SDValue> &Ops,
-                                              SelectionDAG &DAG) const {
+void PPCTargetLowering::CollectTargetIntrinsicOperands(
+    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
   if (I.getNumOperands() <= 1)
     return;
   if (!isa<ConstantSDNode>(Ops[1].getNode()))
@@ -17668,7 +17667,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 
 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 1f22aa16a89be..64ef67923ef0e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1008,9 +1008,9 @@ namespace llvm {
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
-    void CollectTargetIntrinsicOperands(const CallInst &I,
-                                 SmallVectorImpl<SDValue> &Ops,
-                                 SelectionDAG &DAG) const override;
+    void CollectTargetIntrinsicOperands(const CallBase &I,
+                                        SmallVectorImpl<SDValue> &Ops,
+                                        SelectionDAG &DAG) const override;
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
@@ -1069,8 +1069,7 @@ namespace llvm {
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                            const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5b5dca4b541df..00cd0c5c87163 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1644,7 +1644,7 @@ bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
 }
 
 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
+                                             const CallBase &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f4d6cd86397a4..cc401a052c7f2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -517,7 +517,7 @@ class RISCVTargetLowering : public TargetLowering {
 
   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index c347dde89256f..68bdab2da0e33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -55,7 +55,7 @@ MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
 }
 
 bool SPIRVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallInst &I,
+                                             const Callase &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   unsigned AlignIdx = 3;
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
index eb78299b72f04..9e23ae3587fae 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
@@ -49,7 +49,7 @@ class SPIRVTargetLowering : public TargetLowering {
                                          EVT VT) const override;
   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
                                     EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 9ae46e709d823..0f8244dc50107 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1033,7 +1033,7 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
 }
 
 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                   const CallInst &I,
+                                                   const CallBase &I,
                                                    MachineFunction &MF,
                                                    unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 90d31e38a7076..a077fa6cbf7ea 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -72,7 +72,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b128a6dadbbb6..a5c299d3e682e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3101,7 +3101,7 @@ static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
 }
 
 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallInst &I,
+                                           const CallBase &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   Info.flags = MachineMemOperand::MONone;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 4a2b35e9efe7c..1c776db401562 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1467,7 +1467,7 @@ namespace llvm {
     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
     /// true and stores the intrinsic information into the IntrinsicInfo that was
     /// passed to the function.
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 

>From 46f44758efdb6d7829a0c200ca4c2bf3fc290a4f Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 2 Apr 2025 05:04:29 -0500
Subject: [PATCH 4/9] adapt and more tests

---
 llvm/test/Assembler/callbr.ll      | 22 +++++++++++++++++
 llvm/test/CodeGen/AMDGPU/callbr.ll | 22 ++---------------
 llvm/test/Verifier/callbr.ll       | 39 ++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/Assembler/callbr.ll

diff --git a/llvm/test/Assembler/callbr.ll b/llvm/test/Assembler/callbr.ll
new file mode 100644
index 0000000000000..0084e9763c62c
--- /dev/null
+++ b/llvm/test/Assembler/callbr.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S < %s | FileCheck %s
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare void @llvm.amdgcn.kill(i1)
+
+define void @test_kill(i1 %c) {
+; CHECK-LABEL: define void @test_kill(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT:    callbr void @llvm.amdgcn.kill(i1 [[C]])
+; CHECK-NEXT:            to label %[[CONT:.*]] [label %kill]
+; CHECK:       [[KILL:.*:]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[CONT]]:
+; CHECK-NEXT:    ret void
+;
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+  unreachable
+cont:
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
index e2e84dca96cbf..33c75f6cf5aab 100644
--- a/llvm/test/CodeGen/AMDGPU/callbr.ll
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -1,14 +1,5 @@
-; RUN: rm -rf %t && split-file %s %t
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-seldag.s < %t/with-callbr.ll
-; RUN: FileCheck --check-prefix=SELDAG %s < %t/with-callbr-seldag.s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-gisel.s -global-isel < %t/with-callbr.ll
-; RUN: FileCheck --check-prefix=GISEL %s < %t/with-callbr-gisel.s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-seldag.s < %t/without-callbr.ll
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-gisel.s -global-isel < %t/without-callbr.ll
-; RUN: diff %t/with-callbr-seldag.s %t/without-callbr-seldag.s
-; RUN: diff %t/with-callbr-gisel.s %t/without-callbr-gisel.s
-
-;--- with-callbr.ll
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o - < %s | FileCheck --check-prefix=SELDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o - -global-isel < %s | FileCheck --check-prefix=GISEL %s
 
 ; SELDAG-LABEL: test_kill:
 ; SELDAG-NEXT:  ; %bb.0:
@@ -59,12 +50,3 @@ cont:
   store i32 %a, ptr %dst, align 4
   ret void
 }
-
-;--- without-callbr.ll
-
-define void @test_kill(ptr %src, ptr %dst, i1 %c) {
-  %a = load i32, ptr %src, align 4
-  call void @llvm.amdgcn.kill(i1 %c)
-  store i32 %a, ptr %dst, align 4
-  ret void
-}
diff --git a/llvm/test/Verifier/callbr.ll b/llvm/test/Verifier/callbr.ll
index 9b819c5fed48b..8e125e723e6fc 100644
--- a/llvm/test/Verifier/callbr.ll
+++ b/llvm/test/Verifier/callbr.ll
@@ -120,3 +120,42 @@ landingpad:
   %out = call i32 @llvm.callbr.landingpad.i32(i32 %0)
   ret i32 %out
 }
+
+declare void @llvm.amdgcn.kill(i1)
+
+; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest
+define void @test_callbr_intrinsic_indirect0(i1 %c) {
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont []
+kill:
+  unreachable
+cont:
+  ret void
+}
+
+; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest
+define void @test_callbr_intrinsic_indirect2(i1 %c) {
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill1, label %kill2]
+kill1:
+  unreachable
+kill2:
+  unreachable
+cont:
+  ret void
+}
+
+; CHECK-NEXT: Callbr amdgcn_kill indirect dest needs to be unreachable
+define void @test_callbr_intrinsic_no_unreachable(i1 %c) {
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+  ret void
+cont:
+  ret void
+}
+
+; CHECK-NEXT: Callbr currently only supports asm-goto and selected intrinsics
+declare i32 @llvm.amdgcn.workitem.id.x() 
+define void @test_callbr_intrinsic_unsupported() {
+  callbr i32 @llvm.amdgcn.workitem.id.x() to label %cont []
+cont:
+  ret void
+}

>From 20b323dfbd9ba5e88aa616af68af558bb99edd9a Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 3 Apr 2025 02:44:58 -0500
Subject: [PATCH 5/9] Revert "implement feedback"

This reverts commit db57ce26f1780f285db04481f22c56d8262d60ba.
---
 llvm/include/llvm/CodeGen/Analysis.h          |  2 +-
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    | 18 ++---
 llvm/include/llvm/CodeGen/SelectionDAG.h      |  6 +-
 llvm/include/llvm/CodeGen/TargetLowering.h    |  6 +-
 llvm/include/llvm/IR/DiagnosticInfo.h         |  4 +-
 llvm/include/llvm/IR/IntrinsicInst.h          |  8 +--
 llvm/lib/CodeGen/Analysis.cpp                 |  2 +-
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 34 +++++----
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 71 ++++++++++---------
 .../SelectionDAG/SelectionDAGBuilder.h        | 61 ++++++++--------
 .../SelectionDAG/StatepointLowering.cpp       |  2 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  2 +-
 llvm/lib/IR/DiagnosticInfo.cpp                |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  4 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  4 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |  3 +-
 .../Target/Hexagon/HexagonISelLowering.cpp    |  2 +-
 llvm/lib/Target/Hexagon/HexagonISelLowering.h |  2 +-
 .../LoongArch/LoongArchISelLowering.cpp       |  2 +-
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  7 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h     |  2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  7 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |  9 +--
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |  2 +-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |  2 +-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.h     |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  2 +-
 .../WebAssembly/WebAssemblyISelLowering.h     |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  2 +-
 llvm/lib/Target/X86/X86ISelLowering.h         |  2 +-
 36 files changed, 150 insertions(+), 142 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index a4a604dd2c608..362cc30bbd06a 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -150,7 +150,7 @@ bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I,
 
 /// Returns true if the parent of \p CI returns CI's first argument after
 /// calling \p CI.
-bool funcReturnsFirstArgOfCall(const CallBase &CI);
+bool funcReturnsFirstArgOfCall(const CallInst &CI);
 
 DenseMap<const MachineBasicBlock *, int>
 getEHScopeMembership(const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index d3f731cf02be9..6fd05c8fddd5f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -235,26 +235,26 @@ class IRTranslator : public MachineFunctionPass {
   bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);
 
   /// Translate an LLVM string intrinsic (memcpy, memset, ...).
-  bool translateMemFunc(const CallBase &CI, MachineIRBuilder &MIRBuilder,
+  bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder,
                         unsigned Opcode);
 
   /// Translate an LLVM trap intrinsic (trap, debugtrap, ubsantrap).
-  bool translateTrap(const CallBase &U, MachineIRBuilder &MIRBuilder,
+  bool translateTrap(const CallInst &U, MachineIRBuilder &MIRBuilder,
                      unsigned Opcode);
 
   // Translate @llvm.vector.interleave2 and
   // @llvm.vector.deinterleave2 intrinsics for fixed-width vector
   // types into vector shuffles.
-  bool translateVectorInterleave2Intrinsic(const CallBase &CI,
+  bool translateVectorInterleave2Intrinsic(const CallInst &CI,
                                            MachineIRBuilder &MIRBuilder);
-  bool translateVectorDeinterleave2Intrinsic(const CallBase &CI,
+  bool translateVectorDeinterleave2Intrinsic(const CallInst &CI,
                                              MachineIRBuilder &MIRBuilder);
 
   void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder);
 
-  bool translateOverflowIntrinsic(const CallBase &CI, unsigned Op,
+  bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                   MachineIRBuilder &MIRBuilder);
-  bool translateFixedPointIntrinsic(unsigned Op, const CallBase &CI,
+  bool translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
                                     MachineIRBuilder &MIRBuilder);
 
   /// Helper function for translateSimpleIntrinsic.
@@ -265,13 +265,13 @@ class IRTranslator : public MachineFunctionPass {
 
   /// Translates the intrinsics defined in getSimpleIntrinsicOpcode.
   /// \return true if the translation succeeded.
-  bool translateSimpleIntrinsic(const CallBase &CI, Intrinsic::ID ID,
+  bool translateSimpleIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                 MachineIRBuilder &MIRBuilder);
 
   bool translateConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI,
                                        MachineIRBuilder &MIRBuilder);
 
-  bool translateKnownIntrinsic(const CallBase &CI, Intrinsic::ID ID,
+  bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                MachineIRBuilder &MIRBuilder);
 
   /// Returns the single livein physical register Arg was lowered to, if
@@ -588,7 +588,7 @@ class IRTranslator : public MachineFunctionPass {
     return false;
   }
 
-  bool translateConvergenceControlIntrinsic(const CallBase &CI,
+  bool translateConvergenceControlIntrinsic(const CallInst &CI,
                                             Intrinsic::ID ID,
                                             MachineIRBuilder &MIRBuilder);
 
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c690b4106900d..15a2370e5d8b8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1205,7 +1205,7 @@ class SelectionDAG {
    * the tail call optimization decision. */
   SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, Align Alignment, bool isVol,
-                    bool AlwaysInline, const CallBase *CI,
+                    bool AlwaysInline, const CallInst *CI,
                     std::optional<bool> OverrideTailCall,
                     MachinePointerInfo DstPtrInfo,
                     MachinePointerInfo SrcPtrInfo,
@@ -1217,7 +1217,7 @@ class SelectionDAG {
    * the tail call optimization decision. */
   SDValue getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                      SDValue Size, Align Alignment, bool isVol,
-                     const CallBase *CI, std::optional<bool> OverrideTailCall,
+                     const CallInst *CI, std::optional<bool> OverrideTailCall,
                      MachinePointerInfo DstPtrInfo,
                      MachinePointerInfo SrcPtrInfo,
                      const AAMDNodes &AAInfo = AAMDNodes(),
@@ -1225,7 +1225,7 @@ class SelectionDAG {
 
   SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src,
                     SDValue Size, Align Alignment, bool isVol,
-                    bool AlwaysInline, const CallBase *CI,
+                    bool AlwaysInline, const CallInst *CI,
                     MachinePointerInfo DstPtrInfo,
                     const AAMDNodes &AAInfo = AAMDNodes());
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fd7a5b1442d3f..58ac87206b9a6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1238,7 +1238,7 @@ class TargetLoweringBase {
   /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
   /// true and store the intrinsic information into the IntrinsicInfo that was
   /// passed to the function.
-  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
                                   MachineFunction &,
                                   unsigned /*Intrinsic*/) const {
     return false;
@@ -5149,9 +5149,9 @@ class TargetLowering : public TargetLoweringBase {
                                               const AsmOperandInfo &OpInfo,
                                               SelectionDAG &DAG) const;
 
-  // Targets may override this function to collect operands from the CallBase
+  // Targets may override this function to collect operands from the CallInst
   // and for example, lower them into the SelectionDAG operands.
-  virtual void CollectTargetIntrinsicOperands(const CallBase &I,
+  virtual void CollectTargetIntrinsicOperands(const CallInst &I,
                                               SmallVectorImpl<SDValue> &Ops,
                                               SelectionDAG &DAG) const;
 
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 9b6998397b944..779c88993b71c 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -38,7 +38,7 @@ namespace llvm {
 class DiagnosticPrinter;
 class DIFile;
 class DISubprogram;
-class CallBase;
+class CallInst;
 class Function;
 class Instruction;
 class InstructionCost;
@@ -1151,7 +1151,7 @@ class DiagnosticInfoSrcMgr : public DiagnosticInfo {
   }
 };
 
-void diagnoseDontCall(const CallBase &CI);
+void diagnoseDontCall(const CallInst &CI);
 
 class DiagnosticInfoDontCall : public DiagnosticInfo {
   StringRef CalleeName;
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 7a760781d2b96..93750d6e3845e 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -134,14 +134,8 @@ class IntrinsicInst : public CallInst {
       return CF->isIntrinsic();
     return false;
   }
-  static bool classof(const CallBase *I) {
-    if (const Function *CF = I->getCalledFunction())
-      return CF->isIntrinsic();
-    return false;
-  }
   static bool classof(const Value *V) {
-    return (isa<CallInst>(V) && classof(cast<CallInst>(V))) ||
-           (isa<CallBase>(V) && classof(cast<CallBase>(V)));
+    return isa<CallInst>(V) && classof(cast<CallInst>(V));
   }
 };
 
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index f950bb756fb2b..e7b9417de8c9f 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -712,7 +712,7 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
   return true;
 }
 
-bool llvm::funcReturnsFirstArgOfCall(const CallBase &CI) {
+bool llvm::funcReturnsFirstArgOfCall(const CallInst &CI) {
   const ReturnInst *Ret = dyn_cast<ReturnInst>(CI.getParent()->getTerminator());
   Value *RetVal = Ret ? Ret->getReturnValue() : nullptr;
   bool ReturnsFirstArg = false;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 9323966d9ada6..0f698375ad6cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1694,7 +1694,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
   return true;
 }
 
-bool IRTranslator::translateMemFunc(const CallBase &CI,
+bool IRTranslator::translateMemFunc(const CallInst &CI,
                                     MachineIRBuilder &MIRBuilder,
                                     unsigned Opcode) {
   const Value *SrcPtr = CI.getArgOperand(1);
@@ -1785,7 +1785,7 @@ bool IRTranslator::translateMemFunc(const CallBase &CI,
   return true;
 }
 
-bool IRTranslator::translateTrap(const CallBase &CI,
+bool IRTranslator::translateTrap(const CallInst &CI,
                                  MachineIRBuilder &MIRBuilder,
                                  unsigned Opcode) {
   StringRef TrapFuncName =
@@ -1812,7 +1812,7 @@ bool IRTranslator::translateTrap(const CallBase &CI,
 }
 
 bool IRTranslator::translateVectorInterleave2Intrinsic(
-    const CallBase &CI, MachineIRBuilder &MIRBuilder) {
+    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
   assert(CI.getIntrinsicID() == Intrinsic::vector_interleave2 &&
          "This function can only be called on the interleave2 intrinsic!");
   // Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
@@ -1828,7 +1828,7 @@ bool IRTranslator::translateVectorInterleave2Intrinsic(
 }
 
 bool IRTranslator::translateVectorDeinterleave2Intrinsic(
-    const CallBase &CI, MachineIRBuilder &MIRBuilder) {
+    const CallInst &CI, MachineIRBuilder &MIRBuilder) {
   assert(CI.getIntrinsicID() == Intrinsic::vector_deinterleave2 &&
          "This function can only be called on the deinterleave2 intrinsic!");
   // Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
@@ -1868,7 +1868,7 @@ void IRTranslator::getStackGuard(Register DstReg,
   MIB.setMemRefs({MemRef});
 }
 
-bool IRTranslator::translateOverflowIntrinsic(const CallBase &CI, unsigned Op,
+bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                               MachineIRBuilder &MIRBuilder) {
   ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
   MIRBuilder.buildInstr(
@@ -1878,7 +1878,7 @@ bool IRTranslator::translateOverflowIntrinsic(const CallBase &CI, unsigned Op,
   return true;
 }
 
-bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallBase &CI,
+bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
                                                 MachineIRBuilder &MIRBuilder) {
   Register Dst = getOrCreateVReg(CI);
   Register Src0 = getOrCreateVReg(*CI.getOperand(0));
@@ -2023,7 +2023,7 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
   return Intrinsic::not_intrinsic;
 }
 
-bool IRTranslator::translateSimpleIntrinsic(const CallBase &CI,
+bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
                                             Intrinsic::ID ID,
                                             MachineIRBuilder &MIRBuilder) {
 
@@ -2145,7 +2145,7 @@ static unsigned getConvOpcode(Intrinsic::ID ID) {
 }
 
 bool IRTranslator::translateConvergenceControlIntrinsic(
-    const CallBase &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
+    const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
   MachineInstrBuilder MIB = MIRBuilder.buildInstr(getConvOpcode(ID));
   Register OutputReg = getOrCreateConvergenceTokenVReg(CI);
   MIB.addDef(OutputReg);
@@ -2161,7 +2161,7 @@ bool IRTranslator::translateConvergenceControlIntrinsic(
   return true;
 }
 
-bool IRTranslator::translateKnownIntrinsic(const CallBase &CI, Intrinsic::ID ID,
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                            MachineIRBuilder &MIRBuilder) {
   if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
     if (ORE->enabled()) {
@@ -2756,7 +2756,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   if (containsBF16Type(U))
     return false;
 
-  const CallBase &CI = cast<CallBase>(U);
+  const CallInst &CI = cast<CallInst>(U);
   const Function *F = CI.getCalledFunction();
 
   // FIXME: support Windows dllimport function calls and calls through
@@ -3023,14 +3023,22 @@ bool IRTranslator::translateCallBr(const User &U,
   } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
     switch (I.getIntrinsicID()) {
     default:
-      return false;
+      report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
-      if (!translateCall(I, MIRBuilder))
+      if (I.getNumIndirectDests() != 1)
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      bool Success = translateCall(*CI, MIRBuilder);
+      CI->deleteValue();
+      if (!Success)
         return false;
       break;
     }
   } else {
-    return false;
+    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
   }
 
   // Retrieve successors.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7b75e43e0b08e..7ce4eebf685e1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8518,7 +8518,7 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
 
 SDValue SelectionDAG::getMemcpy(
     SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size,
-    Align Alignment, bool isVol, bool AlwaysInline, const CallBase *CI,
+    Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI,
     std::optional<bool> OverrideTailCall, MachinePointerInfo DstPtrInfo,
     MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo,
     BatchAAResults *BatchAA) {
@@ -8644,7 +8644,7 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl,
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                  SDValue Src, SDValue Size, Align Alignment,
-                                 bool isVol, const CallBase *CI,
+                                 bool isVol, const CallInst *CI,
                                  std::optional<bool> OverrideTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo,
@@ -8762,7 +8762,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl,
 SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
                                 SDValue Src, SDValue Size, Align Alignment,
                                 bool isVol, bool AlwaysInline,
-                                const CallBase *CI,
+                                const CallInst *CI,
                                 MachinePointerInfo DstPtrInfo,
                                 const AAMDNodes &AAInfo) {
   // Check to see if we should lower the memset to stores first.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ebca27d79406e..c9501128cd593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3392,7 +3392,14 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
     default:
       report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
-      visitCallBase(I);
+      if (I.getNumIndirectDests() != 1)
+        report_fatal_error(
+            "amdgcn.kill supportes exactly one indirect destination");
+      CallInst *CI =
+          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+                           SmallVector<Value *, 1>(I.args()));
+      visitCall(*CI);
+      CI->deleteValue();
       break;
     }
   } else {
@@ -4751,7 +4758,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   DAG.setRoot(StoreNode);
 }
 
-void SelectionDAGBuilder::visitMaskedStore(const CallBase &I,
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
                                            bool IsCompressing) {
   SDLoc sdl = getCurSDLoc();
 
@@ -4882,7 +4889,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
   return true;
 }
 
-void SelectionDAGBuilder::visitMaskedScatter(const CallBase &I) {
+void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
   // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
@@ -4927,7 +4934,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallBase &I) {
   setValue(&I, Scatter);
 }
 
-void SelectionDAGBuilder::visitMaskedLoad(const CallBase &I, bool IsExpanding) {
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDLoc sdl = getCurSDLoc();
 
   auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0,
@@ -4996,7 +5003,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallBase &I, bool IsExpanding) {
   setValue(&I, Res);
 }
 
-void SelectionDAGBuilder::visitMaskedGather(const CallBase &I) {
+void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
   // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
@@ -5226,7 +5233,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
 /// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallBase &I,
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
   // Ignore the callsite's attributes. A specific call site may be marked with
   // readnone, but the lowering code will expect the chain based on the
@@ -6302,7 +6309,7 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue(
 }
 
 /// Lower the call to the specified intrinsic function.
-void SelectionDAGBuilder::visitConvergenceControl(const CallBase &I,
+void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
                                                   unsigned Intrinsic) {
   SDLoc sdl = getCurSDLoc();
   switch (Intrinsic) {
@@ -6322,7 +6329,7 @@ void SelectionDAGBuilder::visitConvergenceControl(const CallBase &I,
   }
 }
 
-void SelectionDAGBuilder::visitVectorHistogram(const CallBase &I,
+void SelectionDAGBuilder::visitVectorHistogram(const CallInst &I,
                                                unsigned IntrinsicID) {
   // For now, we're only lowering an 'add' histogram.
   // We can add others later, e.g. saturating adds, min/max.
@@ -6380,7 +6387,7 @@ void SelectionDAGBuilder::visitVectorHistogram(const CallBase &I,
   DAG.setRoot(Histogram);
 }
 
-void SelectionDAGBuilder::visitVectorExtractLastActive(const CallBase &I,
+void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
                                                        unsigned Intrinsic) {
   assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
          "Tried lowering invalid vector extract last");
@@ -6408,7 +6415,7 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallBase &I,
 }
 
 /// Lower the call to the specified intrinsic function.
-void SelectionDAGBuilder::visitIntrinsicCall(const CallBase &I,
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                                              unsigned Intrinsic) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDLoc sdl = getCurSDLoc();
@@ -9040,7 +9047,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
   const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
   const Value *Size = I.getArgOperand(2);
   const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
@@ -9132,7 +9139,7 @@ bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallBase &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemChrCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
   const Value *Src = I.getArgOperand(0);
   const Value *Char = I.getArgOperand(1);
   const Value *Length = I.getArgOperand(2);
@@ -9156,7 +9163,7 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallBase &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitMemPCpyCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
   SDValue Dst = getValue(I.getArgOperand(0));
   SDValue Src = getValue(I.getArgOperand(1));
   SDValue Size = getValue(I.getArgOperand(2));
@@ -9195,7 +9202,7 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallBase &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrCpyCall(const CallBase &I, bool isStpcpy) {
+bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9218,7 +9225,7 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallBase &I, bool isStpcpy) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrCmpCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9241,7 +9248,7 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallBase &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrLenCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
   const Value *Arg0 = I.getArgOperand(0);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9262,7 +9269,7 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallBase &I) {
 /// normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitStrNLenCall(const CallBase &I) {
+bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
   const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
 
   const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
@@ -9284,7 +9291,7 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallBase &I) {
 /// false and it will be lowered like a normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitUnaryFloatCall(const CallBase &I,
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
                                               unsigned Opcode) {
   // We already checked this call's prototype; verify it doesn't modify errno.
   if (!I.onlyReadsMemory())
@@ -9304,7 +9311,7 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallBase &I,
 /// false, and it will be lowered like a normal call.
 /// The caller already checked that \p I calls the appropriate LibFunc with a
 /// correct prototype.
-bool SelectionDAGBuilder::visitBinaryFloatCall(const CallBase &I,
+bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
                                                unsigned Opcode) {
   // We already checked this call's prototype; verify it doesn't modify errno.
   if (!I.onlyReadsMemory())
@@ -9320,9 +9327,7 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallBase &I,
   return true;
 }
 
-void SelectionDAGBuilder::visitCall(const CallInst &I) { visitCallBase(I); }
-
-void SelectionDAGBuilder::visitCallBase(const CallBase &I) {
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
   // Handle inline assembly differently.
   if (I.isInlineAsm()) {
     visitInlineAsm(I);
@@ -10485,7 +10490,7 @@ void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
   setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
 }
 
-void SelectionDAGBuilder::visitVAStart(const CallBase &I) {
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
@@ -10507,14 +10512,14 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   setValue(&I, V);
 }
 
-void SelectionDAGBuilder::visitVAEnd(const CallBase &I) {
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
                           DAG.getSrcValue(I.getArgOperand(0))));
 }
 
-void SelectionDAGBuilder::visitVACopy(const CallBase &I) {
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
                           MVT::Other, getRoot(),
                           getValue(I.getArgOperand(0)),
@@ -10632,7 +10637,7 @@ static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
 }
 
 /// Lower llvm.experimental.stackmap.
-void SelectionDAGBuilder::visitStackmap(const CallBase &CI) {
+void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
   //                                  [live variables...])
 
@@ -10844,7 +10849,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
   FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 }
 
-void SelectionDAGBuilder::visitVectorReduce(const CallBase &I,
+void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
                                             unsigned Intrinsic) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Op1 = getValue(I.getArgOperand(0));
@@ -12519,14 +12524,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   }
 }
 
-void SelectionDAGBuilder::visitStepVector(const CallBase &I) {
+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   auto DL = getCurSDLoc();
   EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getStepVector(DL, ResultVT));
 }
 
-void SelectionDAGBuilder::visitVectorReverse(const CallBase &I) {
+void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
@@ -12549,7 +12554,7 @@ void SelectionDAGBuilder::visitVectorReverse(const CallBase &I) {
   setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
 }
 
-void SelectionDAGBuilder::visitVectorDeinterleave(const CallBase &I,
+void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I,
                                                   unsigned Factor) {
   auto DL = getCurSDLoc();
   SDValue InVec = getValue(I.getOperand(0));
@@ -12585,7 +12590,7 @@ void SelectionDAGBuilder::visitVectorDeinterleave(const CallBase &I,
   setValue(&I, Res);
 }
 
-void SelectionDAGBuilder::visitVectorInterleave(const CallBase &I,
+void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I,
                                                 unsigned Factor) {
   auto DL = getCurSDLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -12639,7 +12644,7 @@ void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
                            DAG.getVTList(ValueVTs), Values));
 }
 
-void SelectionDAGBuilder::visitVectorSplice(const CallBase &I) {
+void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
@@ -12713,7 +12718,7 @@ static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
 //   setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
 // otherwise we will end up with copies of virtregs only valid along direct
 // edges.
-void SelectionDAGBuilder::visitCallBrLandingPad(const CallBase &I) {
+void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
   SmallVector<EVT, 8> ResultVTs;
   SmallVector<SDValue, 8> ResultValues;
   const auto *CBR =
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 483e0b9d2db6c..35c15bc269d4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -483,7 +483,7 @@ class SelectionDAGBuilder {
   void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
                                     const BasicBlock *EHPadBB);
 
-  void LowerDeoptimizeCall(const CallBase *CI);
+  void LowerDeoptimizeCall(const CallInst *CI);
   void LowerDeoptimizingReturn();
 
   void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
@@ -537,7 +537,7 @@ class SelectionDAGBuilder {
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
   void visitCallBr(const CallBrInst &I);
-  void visitCallBrLandingPad(const CallBase &I);
+  void visitCallBrLandingPad(const CallInst &I);
   void visitResume(const ResumeInst &I);
 
   void visitUnary(const User &I, unsigned Opcode);
@@ -594,25 +594,24 @@ class SelectionDAGBuilder {
   void visitAlloca(const AllocaInst &I);
   void visitLoad(const LoadInst &I);
   void visitStore(const StoreInst &I);
-  void visitMaskedLoad(const CallBase &I, bool IsExpanding = false);
-  void visitMaskedStore(const CallBase &I, bool IsCompressing = false);
-  void visitMaskedGather(const CallBase &I);
-  void visitMaskedScatter(const CallBase &I);
+  void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+  void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
+  void visitMaskedGather(const CallInst &I);
+  void visitMaskedScatter(const CallInst &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
   void visitAtomicRMW(const AtomicRMWInst &I);
   void visitFence(const FenceInst &I);
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
-  void visitCallBase(const CallBase &I);
-  bool visitMemCmpBCmpCall(const CallBase &I);
-  bool visitMemPCpyCall(const CallBase &I);
-  bool visitMemChrCall(const CallBase &I);
-  bool visitStrCpyCall(const CallBase &I, bool isStpcpy);
-  bool visitStrCmpCall(const CallBase &I);
-  bool visitStrLenCall(const CallBase &I);
-  bool visitStrNLenCall(const CallBase &I);
-  bool visitUnaryFloatCall(const CallBase &I, unsigned Opcode);
-  bool visitBinaryFloatCall(const CallBase &I, unsigned Opcode);
+  bool visitMemCmpBCmpCall(const CallInst &I);
+  bool visitMemPCpyCall(const CallInst &I);
+  bool visitMemChrCall(const CallInst &I);
+  bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
+  bool visitStrCmpCall(const CallInst &I);
+  bool visitStrLenCall(const CallInst &I);
+  bool visitStrNLenCall(const CallInst &I);
+  bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
+  bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode);
   void visitAtomicLoad(const LoadInst &I);
   void visitAtomicStore(const StoreInst &I);
   void visitLoadFromSwiftError(const LoadInst &I);
@@ -625,12 +624,12 @@ class SelectionDAGBuilder {
   bool visitEntryValueDbgValue(ArrayRef<const Value *> Values,
                                DILocalVariable *Variable, DIExpression *Expr,
                                DebugLoc DbgLoc);
-  void visitIntrinsicCall(const CallBase &I, unsigned Intrinsic);
-  void visitTargetIntrinsic(const CallBase &I, unsigned Intrinsic);
+  void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
   void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
-  void visitConvergenceControl(const CallBase &I, unsigned Intrinsic);
-  void visitVectorHistogram(const CallBase &I, unsigned IntrinsicID);
-  void visitVectorExtractLastActive(const CallBase &I, unsigned Intrinsic);
+  void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
+  void visitVectorHistogram(const CallInst &I, unsigned IntrinsicID);
+  void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
   void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
                    const SmallVectorImpl<SDValue> &OpValues);
   void visitVPStore(const VPIntrinsic &VPIntrin,
@@ -646,23 +645,23 @@ class SelectionDAGBuilder {
   void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
   void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
 
-  void visitVAStart(const CallBase &I);
+  void visitVAStart(const CallInst &I);
   void visitVAArg(const VAArgInst &I);
-  void visitVAEnd(const CallBase &I);
-  void visitVACopy(const CallBase &I);
-  void visitStackmap(const CallBase &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
+  void visitStackmap(const CallInst &I);
   void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr);
 
   // These two are implemented in StatepointLowering.cpp
   void visitGCRelocate(const GCRelocateInst &Relocate);
   void visitGCResult(const GCResultInst &I);
 
-  void visitVectorReduce(const CallBase &I, unsigned Intrinsic);
-  void visitVectorReverse(const CallBase &I);
-  void visitVectorSplice(const CallBase &I);
-  void visitVectorInterleave(const CallBase &I, unsigned Factor);
-  void visitVectorDeinterleave(const CallBase &I, unsigned Factor);
-  void visitStepVector(const CallBase &I);
+  void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+  void visitVectorReverse(const CallInst &I);
+  void visitVectorSplice(const CallInst &I);
+  void visitVectorInterleave(const CallInst &I, unsigned Factor);
+  void visitVectorDeinterleave(const CallInst &I, unsigned Factor);
+  void visitStepVector(const CallInst &I);
 
   void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5227fd6a60e7f..80aeefe8e068a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1303,7 +1303,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
   setValue(&Relocate, SD);
 }
 
-void SelectionDAGBuilder::LowerDeoptimizeCall(const CallBase *CI) {
+void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) {
   const auto &TLI = DAG.getTargetLoweringInfo();
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE),
                                          TLI.getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 72e62e17a327a..f1649a3903fac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5655,7 +5655,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 }
 
 void TargetLowering::CollectTargetIntrinsicOperands(
-    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index 83b2f71c28185..0e526ada4b405 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -456,7 +456,7 @@ void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
 
-void llvm::diagnoseDontCall(const CallBase &CI) {
+void llvm::diagnoseDontCall(const CallInst &CI) {
   const auto *F =
       dyn_cast<Function>(CI.getCalledOperand()->stripPointerCasts());
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3f628dbf49abb..1c8e3afdfd718 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16373,7 +16373,7 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
 template <unsigned NumVecs>
 static bool
 setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
-              AArch64TargetLowering::IntrinsicInfo &Info, const CallBase &CI) {
+              AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
   Info.opc = ISD::INTRINSIC_VOID;
   // Retrieve EC from first vector argument.
   const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
@@ -16398,7 +16398,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallBase &I,
+                                               const CallInst &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e1fadc641ebdb..bc0c3a832bb28 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -701,7 +701,7 @@ class AArch64TargetLowering : public TargetLowering {
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b42861147eb75..b0c18715ef810 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1210,7 +1210,7 @@ MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
 }
 
 bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                          const CallBase &CI,
+                                          const CallInst &CI,
                                           MachineFunction &MF,
                                           unsigned IntrID) const {
   Info.flags = MachineMemOperand::MONone;
@@ -1496,7 +1496,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 }
 
 void SITargetLowering::CollectTargetIntrinsicOperands(
-    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+    const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
   switch (cast<IntrinsicInst>(I).getIntrinsicID()) {
   case Intrinsic::amdgcn_addrspacecast_nonnull: {
     // The DAG's ValueType loses the addrspaces.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 6d7f2d16a458e..8e4717a3f64ab 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -310,11 +310,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
   MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallBase &,
+  bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
                           MachineFunction &MF,
                           unsigned IntrinsicID) const override;
 
-  void CollectTargetIntrinsicOperands(const CallBase &I,
+  void CollectTargetIntrinsicOperands(const CallInst &I,
                                       SmallVectorImpl<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f72070521b940..d2f9ec982ae01 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20953,7 +20953,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallBase &I,
+                                           const CallInst &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 883dbaa49d2ed..9fad056edd3f1 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -616,7 +616,8 @@ class VectorType;
     bool isFPImmLegal(const APFloat &Imm, EVT VT,
                       bool ForCodeSize = false) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                            const CallInst &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index ab6f0d5588e8d..1710488e4e292 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2079,7 +2079,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
 /// true and store the intrinsic information into the IntrinsicInfo that was
 /// passed to the function.
 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                               const CallBase &I,
+                                               const CallInst &I,
                                                MachineFunction &MF,
                                                unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 2047c905fefd0..4df88b3a8abd7 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -142,7 +142,7 @@ class HexagonTargetLowering : public TargetLowering {
       const SmallVectorImpl<SDValue> &OutVals,
       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 46c416255fca4..900775eedfa7b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6043,7 +6043,7 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
 }
 
 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                 const CallBase &I,
+                                                 const CallInst &I,
                                                  MachineFunction &MF,
                                                  unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9021c09b6bc25..002fad0e20759 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -212,7 +212,7 @@ class LoongArchTargetLowering : public TargetLowering {
                                           Value *NewVal, Value *Mask,
                                           AtomicOrdering Ord) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index a223190bf9f11..06e221777b7ea 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3632,10 +3632,9 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallBase &I,
-                                             MachineFunction &MF,
-                                             unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+    IntrinsicInfo &Info, const CallInst &I,
+    MachineFunction &MF, unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 74ad5ef704c0d..39470be254efa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -118,7 +118,7 @@ class NVPTXTargetLowering : public TargetLowering {
 
   const char *getTargetNodeName(unsigned Opcode) const override;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0066ac5d41b8c..ab78f33f5a630 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17478,8 +17478,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-void PPCTargetLowering::CollectTargetIntrinsicOperands(
-    const CallBase &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
+void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
+                                              SmallVectorImpl<SDValue> &Ops,
+                                              SelectionDAG &DAG) const {
   if (I.getNumOperands() <= 1)
     return;
   if (!isa<ConstantSDNode>(Ops[1].getNode()))
@@ -17667,7 +17668,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 
 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallBase &I,
+                                           const CallInst &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 64ef67923ef0e..1f22aa16a89be 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1008,9 +1008,9 @@ namespace llvm {
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
-    void CollectTargetIntrinsicOperands(const CallBase &I,
-                                        SmallVectorImpl<SDValue> &Ops,
-                                        SelectionDAG &DAG) const override;
+    void CollectTargetIntrinsicOperands(const CallInst &I,
+                                 SmallVectorImpl<SDValue> &Ops,
+                                 SelectionDAG &DAG) const override;
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
@@ -1069,7 +1069,8 @@ namespace llvm {
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                            const CallInst &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 00cd0c5c87163..5b5dca4b541df 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1644,7 +1644,7 @@ bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
 }
 
 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const CallBase &I,
+                                             const CallInst &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   auto &DL = I.getDataLayout();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index cc401a052c7f2..f4d6cd86397a4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -517,7 +517,7 @@ class RISCVTargetLowering : public TargetLowering {
 
   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
 
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 68bdab2da0e33..c347dde89256f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -55,7 +55,7 @@ MVT SPIRVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
 }
 
 bool SPIRVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                             const Callase &I,
+                                             const CallInst &I,
                                              MachineFunction &MF,
                                              unsigned Intrinsic) const {
   unsigned AlignIdx = 3;
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
index 9e23ae3587fae..eb78299b72f04 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
@@ -49,7 +49,7 @@ class SPIRVTargetLowering : public TargetLowering {
                                          EVT VT) const override;
   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
                                     EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 0f8244dc50107..9ae46e709d823 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -1033,7 +1033,7 @@ EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
 }
 
 bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                                   const CallBase &I,
+                                                   const CallInst &I,
                                                    MachineFunction &MF,
                                                    unsigned Intrinsic) const {
   switch (Intrinsic) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index a077fa6cbf7ea..90d31e38a7076 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -72,7 +72,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
-  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a5c299d3e682e..b128a6dadbbb6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3101,7 +3101,7 @@ static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
 }
 
 bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
-                                           const CallBase &I,
+                                           const CallInst &I,
                                            MachineFunction &MF,
                                            unsigned Intrinsic) const {
   Info.flags = MachineMemOperand::MONone;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 1c776db401562..4a2b35e9efe7c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1467,7 +1467,7 @@ namespace llvm {
     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
     /// true and stores the intrinsic information into the IntrinsicInfo that was
     /// passed to the function.
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
+    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                             MachineFunction &MF,
                             unsigned Intrinsic) const override;
 

>From c373c2eb3ef4c8513b490bd3ef34d811aaa7a888 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Sun, 6 Apr 2025 14:58:41 -0500
Subject: [PATCH 6/9] abstract parts of call code; disallow operand bundles for
 callbr in verifier

---
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    |   5 +
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 158 +++++++-------
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 192 +++++++++++-------
 .../SelectionDAG/SelectionDAGBuilder.h        |  14 +-
 llvm/lib/IR/Verifier.cpp                      |   4 +-
 llvm/test/Verifier/callbr.ll                  |   8 +
 6 files changed, 237 insertions(+), 144 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 6fd05c8fddd5f..ba2aeac8dffe1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -297,6 +297,10 @@ class IRTranslator : public MachineFunctionPass {
   /// \pre \p U is a call instruction.
   bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
 
+  bool translateTargetIntrinsic(
+      const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+      TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+
   /// When an invoke or a cleanupret unwinds to the next EH pad, there are
   /// many places it could ultimately go. In the IR, we have a single unwind
   /// destination, but in the machine CFG, we enumerate all the possible blocks.
@@ -313,6 +317,7 @@ class IRTranslator : public MachineFunctionPass {
   bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
 
   bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
+  bool translateCallBrIntrinsic(const CallBrInst &I, MachineIRBuilder &MIRBuilder);
 
   bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 0f698375ad6cf..fc5cb21d094ad 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2752,59 +2752,27 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
   return Success;
 }
 
-bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
-  if (containsBF16Type(U))
-    return false;
-
-  const CallInst &CI = cast<CallInst>(U);
-  const Function *F = CI.getCalledFunction();
-
-  // FIXME: support Windows dllimport function calls and calls through
-  // weak symbols.
-  if (F && (F->hasDLLImportStorageClass() ||
-            (MF->getTarget().getTargetTriple().isOSWindows() &&
-             F->hasExternalWeakLinkage())))
-    return false;
-
-  // FIXME: support control flow guard targets.
-  if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
-    return false;
-
-  // FIXME: support statepoints and related.
-  if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U))
-    return false;
-
-  if (CI.isInlineAsm())
-    return translateInlineAsm(CI, MIRBuilder);
-
-  diagnoseDontCall(CI);
-
-  Intrinsic::ID ID = Intrinsic::not_intrinsic;
-  if (F && F->isIntrinsic())
-    ID = F->getIntrinsicID();
-
-  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
-    return translateCallBase(CI, MIRBuilder);
-
-  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
-
-  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
-    return true;
-
+/// Translate a call or callbr to a target intrinsic.
+/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
+/// is a pointer to the correspondingly populated IntrinsicInfo object.
+/// Otherwise, this pointer is null.
+bool IRTranslator::translateTargetIntrinsic(
+    const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+    TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
   ArrayRef<Register> ResultRegs;
-  if (!CI.getType()->isVoidTy())
-    ResultRegs = getOrCreateVRegs(CI);
+  if (!CB.getType()->isVoidTy())
+    ResultRegs = getOrCreateVRegs(CB);
 
   // Ignore the callsite attributes. Backend code is most likely not expecting
   // an intrinsic to sometimes have side effects and sometimes not.
   MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
-  if (isa<FPMathOperator>(CI))
-    MIB->copyIRFlags(CI);
+  if (isa<FPMathOperator>(CB))
+    MIB->copyIRFlags(CB);
 
-  for (const auto &Arg : enumerate(CI.args())) {
+  for (const auto &Arg : enumerate(CB.args())) {
     // If this is required to be an immediate, don't materialize it in a
     // register.
-    if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+    if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
         // imm arguments are more convenient than cimm (and realistically
         // probably sufficient), so use them.
@@ -2833,28 +2801,30 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   }
 
   // Add a MachineMemOperand if it is a target mem intrinsic.
-  TargetLowering::IntrinsicInfo Info;
-  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
-  if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
-    Align Alignment = Info.align.value_or(
-        DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
-    LLT MemTy = Info.memVT.isSimple()
-                    ? getLLTForMVT(Info.memVT.getSimpleVT())
-                    : LLT::scalar(Info.memVT.getStoreSizeInBits());
+  if (TgtMemIntrinsicInfo) {
+    const Function *F = CB.getCalledFunction();
+
+    Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign(
+        TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext())));
+    LLT MemTy =
+        TgtMemIntrinsicInfo->memVT.isSimple()
+            ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT())
+            : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits());
 
     // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
     //       didn't yield anything useful.
     MachinePointerInfo MPI;
-    if (Info.ptrVal)
-      MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
-    else if (Info.fallbackAddressSpace)
-      MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
-    MIB.addMemOperand(
-        MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
+    if (TgtMemIntrinsicInfo->ptrVal)
+      MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal,
+                               TgtMemIntrinsicInfo->offset);
+    else if (TgtMemIntrinsicInfo->fallbackAddressSpace)
+      MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace);
+    MIB.addMemOperand(MF->getMachineMemOperand(
+        MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata()));
   }
 
-  if (CI.isConvergent()) {
-    if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+  if (CB.isConvergent()) {
+    if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
       auto *Token = Bundle->Inputs[0].get();
       Register TokenReg = getOrCreateVReg(*Token);
       MIB.addUse(TokenReg, RegState::Implicit);
@@ -2864,6 +2834,53 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   return true;
 }
 
+bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
+  if (containsBF16Type(U))
+    return false;
+
+  const CallInst &CI = cast<CallInst>(U);
+  const Function *F = CI.getCalledFunction();
+
+  // FIXME: support Windows dllimport function calls and calls through
+  // weak symbols.
+  if (F && (F->hasDLLImportStorageClass() ||
+            (MF->getTarget().getTargetTriple().isOSWindows() &&
+             F->hasExternalWeakLinkage())))
+    return false;
+
+  // FIXME: support control flow guard targets.
+  if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+    return false;
+
+  // FIXME: support statepoints and related.
+  if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U))
+    return false;
+
+  if (CI.isInlineAsm())
+    return translateInlineAsm(CI, MIRBuilder);
+
+  diagnoseDontCall(CI);
+
+  Intrinsic::ID ID = Intrinsic::not_intrinsic;
+  if (F && F->isIntrinsic())
+    ID = F->getIntrinsicID();
+
+  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
+    return translateCallBase(CI, MIRBuilder);
+
+  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
+
+  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
+    return true;
+
+  TargetLowering::IntrinsicInfo Info;
+  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
+  bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID);
+
+  return translateTargetIntrinsic(CI, ID, MIRBuilder,
+                                  IsTgtMemIntrinsic ? &Info : nullptr);
+}
+
 bool IRTranslator::findUnwindDestinations(
     const BasicBlock *EHPadBB,
     BranchProbability Prob,
@@ -3007,15 +3024,16 @@ bool IRTranslator::translateInvoke(const User &U,
   return true;
 }
 
+/// The intrinsics currently supported by callbr are implicit control flow
+/// intrinsics such as amdgcn.kill.
 bool IRTranslator::translateCallBr(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
+  if (containsBF16Type(U))
+    return false; // see translateCall
+
   const CallBrInst &I = cast<CallBrInst>(U);
   MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
 
-  // TODO: operand bundles (see SelDAG implementation of callbr)?
-  assert(!I.hasOperandBundles() &&
-         "Cannot lower callbrs with operand bundles yet");
-
   if (I.isInlineAsm()) {
     // FIXME: inline asm not yet supported
     if (!translateInlineAsm(I, MIRBuilder))
@@ -3025,15 +3043,7 @@ bool IRTranslator::translateCallBr(const User &U,
     default:
       report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
-      if (I.getNumIndirectDests() != 1)
-        report_fatal_error(
-            "amdgcn.kill supportes exactly one indirect destination");
-      CallInst *CI =
-          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
-                           SmallVector<Value *, 1>(I.args()));
-      bool Success = translateCall(*CI, MIRBuilder);
-      CI->deleteValue();
-      if (!Success)
+      if (!translateTargetIntrinsic(I, Intrinsic::amdgcn_kill, MIRBuilder))
         return false;
       break;
     }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c9501128cd593..8e1c097ec0a5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3376,15 +3376,28 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
+/// The intrinsics currently supported by callbr are implicit control flow
+/// intrinsics such as amdgcn.kill.
+/// - they should be called (no "dontcall-" attributes)
+/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
+/// - they do not need custom argument handling (no TLI.CollectTargetIntrinsicOperands())
+void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
+  auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+  SmallVector<SDValue, 8> Ops =
+      getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
+  SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
+
+  // Create the node.
+  SDValue Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs);
+  Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
+
+  setValue(&I, Result);
+}
+
 void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
   MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
 
-  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
-  // have to do anything here to lower funclet bundles.
-  assert(!I.hasOperandBundlesOtherThan(
-             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
-         "Cannot lower callbrs with arbitrary operand bundles yet!");
-
   if (I.isInlineAsm()) {
     visitInlineAsm(I);
   } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
@@ -3392,14 +3405,7 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
     default:
       report_fatal_error("Unsupported intrinsic for callbr");
     case Intrinsic::amdgcn_kill:
-      if (I.getNumIndirectDests() != 1)
-        report_fatal_error(
-            "amdgcn.kill supportes exactly one indirect destination");
-      CallInst *CI =
-          CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
-                           SmallVector<Value *, 1>(I.args()));
-      visitCall(*CI);
-      CI->deleteValue();
+      visitCallBrIntrinsic(I);
       break;
     }
   } else {
@@ -5231,18 +5237,25 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
   DAG.setRoot(OutChain);
 }
 
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
-                                               unsigned Intrinsic) {
-  // Ignore the callsite's attributes. A specific call site may be marked with
-  // readnone, but the lowering code will expect the chain based on the
-  // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool> SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase& I) {
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
   bool OnlyLoad =
       HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
 
+  return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+    const CallBase &I, bool HasChain, bool OnlyLoad,
+    TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;
   if (HasChain) {  // If this intrinsic has side-effects, chainify it.
@@ -5254,17 +5267,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     }
   }
 
-  // Info is set by getTgtMemIntrinsic
-  TargetLowering::IntrinsicInfo Info;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
-                                               DAG.getMachineFunction(),
-                                               Intrinsic);
-
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
-  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
-      Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+  if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+      TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+    Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
                                         TLI.getPointerTy(DAG.getDataLayout())));
 
   // Add all operands of the call to the operand list.
@@ -5287,13 +5293,96 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     }
   }
 
+  if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+    auto *Token = Bundle->Inputs[0].get();
+    SDValue ConvControlToken = getValue(Token);
+    assert(Ops.back().getValueType() != MVT::Glue &&
+           "Did not expected another glue node here.");
+    ConvControlToken =
+        DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+    Ops.push_back(ConvControlToken);
+  }
+
+  return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+                                                       bool HasChain) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
 
-  SDVTList VTs = DAG.getVTList(ValueVTs);
+  return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch touch memory.
+SDValue
+SelectionDAGBuilder::getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain,
+                                            SmallVector<SDValue, 8> &Ops,
+                                            SDVTList &VTs) {
+  SDValue Result;
+
+  if (!HasChain) {
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+  } else if (!I.getType()->isVoidTy()) {
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+  } else {
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+  }
+
+  return Result;
+}
+
+/// Set root, convert return type if necessaey and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+                                                      bool HasChain,
+                                                      bool OnlyLoad,
+                                                      SDValue Result) {
+  if (HasChain) {
+    SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+
+  if (I.getType()->isVoidTy())
+    return Result;
+
+  if (!isa<VectorType>(I.getType()))
+    Result = lowerRangeToAssertZExt(DAG, I, Result);
+
+  MaybeAlign Alignment = I.getRetAlign();
+
+  // Insert `assertalign` node if there's an alignment.
+  if (InsertAssertAlign && Alignment) {
+    Result =
+        DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+  }
+
+  return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+                                               unsigned Intrinsic) {
+  auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+  // Info is set by getTgtMemIntrinsic
+  TargetLowering::IntrinsicInfo Info;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  bool IsTgtMemIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+                                               DAG.getMachineFunction(),
+                                               Intrinsic);
+
+  SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+      I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+  SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
 
   // Propagate fast-math-flags from IR to node(s).
   SDNodeFlags Flags;
@@ -5304,19 +5393,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Create the node.
   SDValue Result;
 
-  if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
-    auto *Token = Bundle->Inputs[0].get();
-    SDValue ConvControlToken = getValue(Token);
-    assert(Ops.back().getValueType() != MVT::Glue &&
-           "Did not expected another glue node here.");
-    ConvControlToken =
-        DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
-    Ops.push_back(ConvControlToken);
-  }
-
   // In some cases, custom collection of operands from CallInst I may be needed.
   TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
-  if (IsTgtIntrinsic) {
+  if (IsTgtMemIntrinsic) {
     // This is target intrinsic that touches memory
     //
     // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5329,34 +5408,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops,
                                      Info.memVT, MPI, Info.align, Info.flags,
                                      Info.size, I.getAAMetadata());
-  } else if (!HasChain) {
-    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
-  } else if (!I.getType()->isVoidTy()) {
-    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
   } else {
-    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+    Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs);
   }
 
-  if (HasChain) {
-    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
-    if (OnlyLoad)
-      PendingLoads.push_back(Chain);
-    else
-      DAG.setRoot(Chain);
-  }
-
-  if (!I.getType()->isVoidTy()) {
-    if (!isa<VectorType>(I.getType()))
-      Result = lowerRangeToAssertZExt(DAG, I, Result);
-
-    MaybeAlign Alignment = I.getRetAlign();
-
-    // Insert `assertalign` node if there's an alignment.
-    if (InsertAssertAlign && Alignment) {
-      Result =
-          DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
-    }
-  }
+  Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
 
   setValue(&I, Result);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 35c15bc269d4b..71f67a7f1eff6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -536,10 +536,12 @@ class SelectionDAGBuilder {
 private:
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
-  void visitCallBr(const CallBrInst &I);
   void visitCallBrLandingPad(const CallInst &I);
   void visitResume(const ResumeInst &I);
 
+  void visitCallBr(const CallBrInst &I);
+  void visitCallBrIntrinsic(const CallBrInst &I);
+
   void visitUnary(const User &I, unsigned Opcode);
   void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
 
@@ -709,6 +711,16 @@ class SelectionDAGBuilder {
                        MCSymbol *&BeginLabel);
   SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
                      const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+  std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase& I);
+  SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+      const CallBase &I, bool HasChain, bool OnlyLoad,
+      TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+  SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+  SDValue getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain,
+                                 SmallVector<SDValue, 8> &Ops, SDVTList &VTs);
+  SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+                                   bool OnlyLoad, SDValue Result);
 };
 
 /// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index fbf6e087177c6..021550261c07a 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3250,6 +3250,9 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
 
 void Verifier::visitCallBrInst(CallBrInst &CBI) {
   if (!CBI.isInlineAsm()) {
+    Check(CBI.getCalledFunction(), "Callbr: indirect function / invalid signature");
+    Check(!CBI.hasOperandBundles(), "Callbr currently doesn't support operand bundles");
+
     switch (CBI.getIntrinsicID()) {
     case Intrinsic::amdgcn_kill: {
       Check(CBI.getNumIndirectDests() == 1,
@@ -3259,7 +3262,6 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
       Check(Unreachable || (Call && Call->getIntrinsicID() ==
                                         Intrinsic::amdgcn_unreachable),
             "Callbr amdgcn_kill indirect dest needs to be unreachable");
-      visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
       break;
     }
     default:
diff --git a/llvm/test/Verifier/callbr.ll b/llvm/test/Verifier/callbr.ll
index 8e125e723e6fc..29bd3397b8980 100644
--- a/llvm/test/Verifier/callbr.ll
+++ b/llvm/test/Verifier/callbr.ll
@@ -159,3 +159,11 @@ define void @test_callbr_intrinsic_unsupported() {
 cont:
   ret void
 }
+
+; CHECK-NEXT: Callbr: indirect function / invalid signature
+define void @test_callbr_intrinsic_wrong_signature(ptr %ptr) {
+  %func = load ptr, ptr %ptr, align 8
+  callbr void %func() to label %cont []
+cont:
+  ret void
+}

>From 7579a657be50c3135cd5ff6e169369b7f4a6b3f6 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Sun, 6 Apr 2025 14:59:29 -0500
Subject: [PATCH 7/9] fix formatting

---
 .../llvm/CodeGen/GlobalISel/IRTranslator.h    |  3 ++-
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 24 +++++++++----------
 .../SelectionDAG/SelectionDAGBuilder.h        |  5 ++--
 llvm/lib/IR/Verifier.cpp                      |  6 +++--
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index ba2aeac8dffe1..941750510f1e1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -317,7 +317,8 @@ class IRTranslator : public MachineFunctionPass {
   bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
 
   bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
-  bool translateCallBrIntrinsic(const CallBrInst &I, MachineIRBuilder &MIRBuilder);
+  bool translateCallBrIntrinsic(const CallBrInst &I,
+                                MachineIRBuilder &MIRBuilder);
 
   bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8e1c097ec0a5e..d5c7f315d6aff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3380,7 +3380,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
 /// intrinsics such as amdgcn.kill.
 /// - they should be called (no "dontcall-" attributes)
 /// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
-/// - they do not need custom argument handling (no TLI.CollectTargetIntrinsicOperands())
+/// - they do not need custom argument handling (no
+/// TLI.CollectTargetIntrinsicOperands())
 void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
   auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
 
@@ -5242,7 +5243,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 /// Ignore the callsite's attributes. A specific call site may be marked with
 /// readnone, but the lowering code will expect the chain based on the
 /// definition.
-std::pair<bool, bool> SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase& I) {
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
   const Function *F = I.getCalledFunction();
   bool HasChain = !F->doesNotAccessMemory();
   bool OnlyLoad =
@@ -5319,11 +5321,11 @@ SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
   return DAG.getVTList(ValueVTs);
 }
 
-/// Get an INTRINSIC node for a target intrinsic which does not touch touch memory.
-SDValue
-SelectionDAGBuilder::getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain,
-                                            SmallVector<SDValue, 8> &Ops,
-                                            SDVTList &VTs) {
+/// Get an INTRINSIC node for a target intrinsic which does not touch touch
+/// memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+    const CallBase &I, bool HasChain, SmallVector<SDValue, 8> &Ops,
+    SDVTList &VTs) {
   SDValue Result;
 
   if (!HasChain) {
@@ -5360,8 +5362,7 @@ SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
 
   // Insert `assertalign` node if there's an alignment.
   if (InsertAssertAlign && Alignment) {
-    Result =
-        DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+    Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
   }
 
   return Result;
@@ -5376,9 +5377,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Info is set by getTgtMemIntrinsic
   TargetLowering::IntrinsicInfo Info;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  bool IsTgtMemIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
-                                               DAG.getMachineFunction(),
-                                               Intrinsic);
+  bool IsTgtMemIntrinsic =
+      TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
 
   SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
       I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 71f67a7f1eff6..c1cf2c4035103 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -712,13 +712,14 @@ class SelectionDAGBuilder {
   SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
                      const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
 
-  std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase& I);
+  std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
   SmallVector<SDValue, 8> getTargetIntrinsicOperands(
       const CallBase &I, bool HasChain, bool OnlyLoad,
       TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
   SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
   SDValue getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain,
-                                 SmallVector<SDValue, 8> &Ops, SDVTList &VTs);
+                                       SmallVector<SDValue, 8> &Ops,
+                                       SDVTList &VTs);
   SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
                                    bool OnlyLoad, SDValue Result);
 };
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 021550261c07a..f61bd18e7b2ed 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3250,8 +3250,10 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
 
 void Verifier::visitCallBrInst(CallBrInst &CBI) {
   if (!CBI.isInlineAsm()) {
-    Check(CBI.getCalledFunction(), "Callbr: indirect function / invalid signature");
-    Check(!CBI.hasOperandBundles(), "Callbr currently doesn't support operand bundles");
+    Check(CBI.getCalledFunction(),
+          "Callbr: indirect function / invalid signature");
+    Check(!CBI.hasOperandBundles(),
+          "Callbr currently doesn't support operand bundles");
 
     switch (CBI.getIntrinsicID()) {
     case Intrinsic::amdgcn_kill: {

>From 123f7eb084e0f1e3602814f0e2dd20bced6041e3 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 9 Apr 2025 09:53:45 -0500
Subject: [PATCH 8/9] add callbr test with different succ order

---
 llvm/test/CodeGen/AMDGPU/callbr.ll | 50 ++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
index 33c75f6cf5aab..52856b872ee2a 100644
--- a/llvm/test/CodeGen/AMDGPU/callbr.ll
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -50,3 +50,53 @@ cont:
   store i32 %a, ptr %dst, align 4
   ret void
 }
+
+; SELDAG-LABEL: test_kill_block_order:
+; SELDAG-NEXT:  ; %bb.0:
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_load_dword v0, v[0:1]
+; SELDAG-NEXT:      v_and_b32_e32 v1, 1, v4
+; SELDAG-NEXT:      v_cmp_eq_u32_e32 vcc, 1, v1
+; SELDAG-NEXT:      s_mov_b64 s[4:5], exec
+; SELDAG-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; SELDAG-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; SELDAG-NEXT:      s_cbranch_scc0 .LBB1_2
+; SELDAG-NEXT:  ; %bb.1:
+; SELDAG-NEXT:      s_and_b64 exec, exec, s[4:5]
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      flat_store_dword v[2:3], v0
+; SELDAG-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; SELDAG-NEXT:      s_setpc_b64 s[30:31]
+; SELDAG-NEXT:  .LBB1_2:
+; SELDAG-NEXT:      s_mov_b64 exec, 0
+; SELDAG-NEXT:      s_endpgm
+
+; GISEL-LABEL: test_kill_block_order:
+; GISEL-NEXT:  ; %bb.0:
+; GISEL-NEXT:      s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_load_dword v0, v[0:1]
+; GISEL-NEXT:      v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT:      v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT:      s_mov_b64 s[4:5], exec
+; GISEL-NEXT:      s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT:      s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT:      s_cbranch_scc0 .LBB1_2
+; GISEL-NEXT:  ; %bb.1:
+; GISEL-NEXT:      s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      flat_store_dword v[2:3], v0
+; GISEL-NEXT:      s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:      s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB1_2:
+; GISEL-NEXT:      s_mov_b64 exec, 0
+; GISEL-NEXT:      s_endpgm
+
+define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
+  %a = load i32, ptr %src, align 4
+  callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+cont:
+  store i32 %a, ptr %dst, align 4
+  ret void
+kill:
+  unreachable
+}

>From d58d9252441247eaca15c0273c788a6098c9337f Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Mon, 14 Apr 2025 03:32:41 -0500
Subject: [PATCH 9/9] implement feedback

---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp         | 12 +++---------
 .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 10 ++--------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index fc5cb21d094ad..63c0425a3e432 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3039,16 +3039,10 @@ bool IRTranslator::translateCallBr(const User &U,
     if (!translateInlineAsm(I, MIRBuilder))
       return false;
   } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
-    switch (I.getIntrinsicID()) {
-    default:
-      report_fatal_error("Unsupported intrinsic for callbr");
-    case Intrinsic::amdgcn_kill:
-      if (!translateTargetIntrinsic(I, Intrinsic::amdgcn_kill, MIRBuilder))
-        return false;
-      break;
-    }
+    if (!translateTargetIntrinsic(I, I.getIntrinsicID(), MIRBuilder))
+      return false;
   } else {
-    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+    return false;
   }
 
   // Retrieve successors.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d5c7f315d6aff..501fe104d79e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3402,15 +3402,9 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
   if (I.isInlineAsm()) {
     visitInlineAsm(I);
   } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
-    switch (I.getIntrinsicID()) {
-    default:
-      report_fatal_error("Unsupported intrinsic for callbr");
-    case Intrinsic::amdgcn_kill:
-      visitCallBrIntrinsic(I);
-      break;
-    }
+    visitCallBrIntrinsic(I);
   } else {
-    report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+    report_fatal_error("only know how to handle inlineasm/intrinsic callbr");
   }
   CopyToExportRegsIfNeeded(&I);
 



More information about the llvm-commits mailing list