[llvm] [polly] [IR] Add CallBr intrinsics support (PR #133907)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 14:59:05 PDT 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/133907
>From 9117dcb4b3364aa5096e05c52371d21bb08d4417 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 17 Jul 2025 08:32:43 -0500
Subject: [PATCH 1/6] [AMDGPU][FixIrreducible][UnifyLoopExits] Support callbr
with inline-asm
First batch of changes to add support for basic inline-asm callbr for
the AMDGPU backend.
---
.../llvm/Support/GenericLoopInfoImpl.h | 2 +-
.../llvm/Transforms/Utils/BasicBlockUtils.h | 9 +-
.../llvm/Transforms/Utils/ControlFlowUtils.h | 35 +-
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 15 +-
.../lib/Transforms/Utils/ControlFlowUtils.cpp | 57 +-
llvm/lib/Transforms/Utils/FixIrreducible.cpp | 127 ++-
llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 72 +-
.../Transforms/FixIrreducible/bug45623.ll | 109 +++
llvm/test/Transforms/FixIrreducible/callbr.ll | 842 ++++++++++++++++++
llvm/test/Transforms/FixIrreducible/nested.ll | 676 ++++++++++++++
.../Transforms/FixIrreducible/unreachable.ll | 23 +
llvm/test/Transforms/UnifyLoopExits/basic.ll | 131 ++-
.../UnifyLoopExits/integer_guards.ll | 410 +++++++++
llvm/test/Transforms/UnifyLoopExits/nested.ll | 90 ++
.../Transforms/UnifyLoopExits/restore-ssa.ll | 236 +++++
.../Transforms/UnifyLoopExits/undef-phis.ll | 68 ++
16 files changed, 2850 insertions(+), 52 deletions(-)
create mode 100644 llvm/test/Transforms/FixIrreducible/callbr.ll
diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
index 6fc508b0e0cca..8b7927357d57d 100644
--- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h
+++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
if (BB == getHeader()) {
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
} else if (!OutsideLoopPreds.empty()) {
- // A non-header loop shouldn't be reachable from outside the loop,
+ // A non-header loop block shouldn't be reachable from outside the loop,
// though it is permitted if the predecessor is not itself actually
// reachable.
BlockT *EntryBB = &BB->getParent()->front();
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 979f3b3eb72ff..fc7b313eb552a 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -607,10 +607,17 @@ LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F,
// successors
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-// Check whether the function only has simple terminator:
+template <typename... TermInst>
+LLVM_ABI bool hasOnlyGivenTerminators(const Function &F);
+
+// Check whether the function only has blocks with simple terminators:
// br/brcond/unreachable/ret
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators
+// (br/brcond/unreachable/ret) or callbr.
+LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
index 810fef29f4010..e55efbc907d42 100644
--- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
@@ -15,10 +15,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CycleInfo.h"
namespace llvm {
class BasicBlock;
+class CallBrInst;
+class LoopInfo;
class DomTreeUpdater;
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
};
- void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
+ void addBranch(BasicBlock *BB, BasicBlock *Succ0,
+ BasicBlock *Succ1 = nullptr) {
assert(BB);
assert(Succ0 || Succ1);
Branches.emplace_back(BB, Succ0, Succ1);
@@ -118,6 +122,35 @@ struct ControlFlowHub {
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
SmallVector<BranchDescriptor> Branches;
+
+ /**
+ * \brief Create a new intermediate target block for a callbr edge.
+ *
+ * This function creates a new basic block (the "target block") that sits
+ * between a callbr instruction and one of its successors. The callbr's
+ * successor is rewired to this new block, and the new block unconditionally
+ * branches to the original successor. This is useful for normalizing control
+ * flow, e.g., when transforming irreducible loops.
+ *
+ * \param CallBr The callbr instruction whose edge is to be split.
+ * \param Succ The original successor basic block to be reached.
+ * \param SuccIdx The index of the successor in the callbr instruction.
+ * \param AttachToCallBr If true, the new block is associated with the
+ * callbr's parent for loop/cycle info. If false, the new block is associated
+ * with the callbr's successor for loop/cycle info. \param CI Optional
+ * CycleInfo for updating cycle membership. \param DTU Optional
+ * DomTreeUpdater for updating the dominator tree. \param LI Optional LoopInfo
+ * for updating loop membership.
+ *
+ * \returns The newly created intermediate target block.
+ *
+ * \note This function updates PHI nodes, dominator tree, loop info, and cycle
+ * info as needed.
+ */
+ static BasicBlock *
+ createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
+ bool AttachToCallBr = true, CycleInfo *CI = nullptr,
+ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index c8255742c41ba..6103d07212fc2 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1766,12 +1766,21 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->swapSuccessors();
}
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+template <typename... TermInst>
+bool llvm::hasOnlyGivenTerminators(const Function &F) {
for (auto &BB : F) {
auto *Term = BB.getTerminator();
- if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
- isa<BranchInst>(Term)))
+ if (!(isa<TermInst>(Term) || ...))
return false;
}
return true;
}
+
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst>(F);
+}
+
+bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
+ CallBrInst>(F);
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 4b0065d0030cd..f7197a68813dd 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
@@ -282,7 +283,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
for (auto [BB, Succ0, Succ1] : Branches) {
#ifndef NDEBUG
- assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
+ assert(
+ (Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
+ "Duplicate entry for incoming block.");
#endif
if (Succ0)
Outgoing.insert(Succ0);
@@ -342,3 +345,55 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
return {FirstGuardBlock, true};
}
+
+BasicBlock *ControlFlowHub::createCallBrTarget(
+ CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx, bool AttachToCallBr,
+ CycleInfo *CI, DomTreeUpdater *DTU, LoopInfo *LI) {
+ BasicBlock *CallBrBlock = CallBr->getParent();
+ BasicBlock *CallBrTarget =
+ BasicBlock::Create(CallBrBlock->getContext(),
+ CallBrBlock->getName() + ".target." + Succ->getName(),
+ CallBrBlock->getParent());
+ // Rewire control flow from callbr to the new target block.
+ Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
+ CallBr->setSuccessor(SuccIdx, CallBrTarget);
+ // Jump from the new target block to the original successor.
+ BranchInst::Create(Succ, CallBrTarget);
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ); L) {
+ bool AddToLoop = true;
+ if (AttachToCallBr) {
+ // Check if the loops are disjoint. In that case, we do not add the
+ // intermediate target to any loop.
+ if (auto *LL = LI->getLoopFor(Succ);
+ LL && !L->contains(LL) && !LL->contains(L))
+ AddToLoop = false;
+ }
+ if (AddToLoop)
+ L->addBasicBlockToLoop(CallBrTarget, *LI);
+ }
+ }
+ if (CI) {
+ if (auto *C = CI->getCycle(AttachToCallBr ? CallBrBlock : Succ); C) {
+ bool AddToCycle = true;
+ if (AttachToCallBr) {
+ // Check if the cycles are disjoint. In that case, we do not add the
+ // intermediate target to any cycle.
+ if (auto *CC = CI->getCycle(Succ); CC) {
+ auto *CommonC = CI->getSmallestCommonCycle(C, CC);
+ if (CommonC != C && CommonC != CC)
+ AddToCycle = false;
+ }
+ }
+ if (AddToCycle)
+ CI->addBlockToCycle(CallBrTarget, C);
+ }
+ }
+ if (DTU) {
+ DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
+ if (DTU->getDomTree().dominates(CallBrBlock, Succ))
+ DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
+ {DominatorTree::Insert, CallBrTarget, Succ}});
+ }
+ return CallBrTarget;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 45e1d12c2bfff..ade23f942352d 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -79,6 +79,53 @@
// Limitation: The pass cannot handle switch statements and indirect
// branches. Both must be lowered to plain branches first.
//
+// CallBr support: CallBr is handled as a more general branch instruction which
+// can have multiple successors. The pass redirects the edges to intermediate
+// target blocks that unconditionally branch to the original callbr target
+// blocks. This allows the control flow hub to know to which of the original
+// target blocks to jump to.
+// Example input CFG:
+// Entry (callbr)
+// / \
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// becomes:
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// | |
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// Note
+// OUTPUT CFG: Converted to a natural loop with a new header N.
+//
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// \ /
+// \ /
+// v v
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
+// |
+// v
+// Exit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
return false;
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
ControlFlowHub CHub;
SetVector<BasicBlock *> Predecessors;
@@ -242,18 +290,33 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- // Exactly one of the two successors is the header.
- BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
- BasicBlock *Succ1 = Succ0 ? nullptr : Header;
- if (!Succ0)
- assert(Branch->getSuccessor(1) == Header);
- assert(Succ0 || Succ1);
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ if (!Succ0)
+ assert(Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (Succ != Header)
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, false, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +329,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (!C.contains(Succ))
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, true, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +370,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
SetVector<BasicBlock *> Entries;
Entries.insert(C.entry_rbegin(), C.entry_rend());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
CHub.finalize(&DTU, GuardBlocks, "irr");
#if defined(EXPENSIVE_CHECKS)
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,7 +402,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+ assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9f338dbc78cff..51e5aaa5225e1 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -12,7 +12,11 @@
//
// Limitation: This assumes that all terminators in the CFG are direct branches
// (the "br" instruction). The presence of any other control flow
-// such as indirectbr, switch or callbr will cause an assert.
+// such as indirectbr ot switch will cause an assert.
+// The callbr terminator is supported by creating intermediate
+// target blocks that unconditionally branch to the original target
+// blocks. These intermediate target blocks can then be redirected
+// through the ControlFlowHub as usual.
//
//===----------------------------------------------------------------------===//
@@ -150,25 +154,53 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SmallVector<BasicBlock *, 8> CallBrTargetBlocks;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
- for (auto *BB : ExitingBlocks) {
- auto *Branch = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = L->contains(Succ0) ? nullptr : Succ0;
-
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = L->contains(Succ1) ? nullptr : Succ1;
- CHub.addBranch(BB, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+
+ for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
+ BasicBlock *BB = ExitingBlocks[I];
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = L->contains(Succ0) ? nullptr : Succ0;
+
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = L->contains(Succ1) ? nullptr : Succ1;
+ CHub.addBranch(BB, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
+ << (Succ0 ? Succ0->getName() : "<none>") << ", "
+ << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator());
+ CallBr) {
+ for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
+ BasicBlock *Succ = CallBr->getSuccessor(J);
+ if (L->contains(Succ))
+ continue;
+ BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, J, false, nullptr, &DTU, &LI);
+ // ExitingBlocks is later used to restore SSA, so we need to make sure
+ // that the blocks used for phi nodes in the guard blocks match the
+ // predecessors of the guard blocks, which, in the case of callbr, are
+ // the new intermediate target blocks instead of the callbr blocks
+ // themselves.
+ ExitingBlocks[I] = NewSucc;
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ // Also add the new target block to the list of exiting blocks that
+ // should later be added to the parent loops.
+ CallBrTargetBlocks.push_back(NewSucc);
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
BasicBlock *LoopExitBlock;
bool ChangedCFG;
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
@@ -186,11 +218,17 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
L->verifyLoop();
// The guard blocks were created outside the loop, so they need to become
- // members of the parent loop.
+ // members of the parent loop. Same goes for the callbr target blocks if they
+ // have not already been added to the respective parent loop by adding them to
+ // the original callbr target's loop.
if (auto ParentLoop = L->getParentLoop()) {
for (auto *G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
}
+ for (auto *C : CallBrTargetBlocks) {
+ if (!ParentLoop->contains(C))
+ ParentLoop->addBasicBlockToLoop(C, LI);
+ }
ParentLoop->verifyLoop();
}
@@ -218,7 +256,7 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+ assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
return runImpl(LI, DT);
}
diff --git a/llvm/test/Transforms/FixIrreducible/bug45623.ll b/llvm/test/Transforms/FixIrreducible/bug45623.ll
index 58724431ff0ee..57ebf0c47e515 100644
--- a/llvm/test/Transforms/FixIrreducible/bug45623.ll
+++ b/llvm/test/Transforms/FixIrreducible/bug45623.ll
@@ -90,3 +90,112 @@ for.end626: ; preds = %for.cond616
if.else629: ; preds = %backtrack
br label %retry
}
+
+define dso_local void @tre_tnfa_run_backtrack_callbr(i1 %arg) {
+; CHECK-LABEL: @tre_tnfa_run_backtrack_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETRY:%.*]] []
+; CHECK: retry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG:%.*]])
+; CHECK-NEXT: to label [[RETRY_TARGET_BACKTRACK:%.*]] [label %retry.target.while.body248]
+; CHECK: while.body248:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[IF_THEN250:%.*]] [label %if.end275]
+; CHECK: if.then250:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND264:%.*]] []
+; CHECK: for.cond264:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY267:%.*]] [label %backtrack]
+; CHECK: for.body267:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND264]] []
+; CHECK: if.end275:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND342:%.*]] []
+; CHECK: for.cond342:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY345:%.*]] [label %for.end580]
+; CHECK: for.body345:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND342]] []
+; CHECK: for.end580:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[BACKTRACK:%.*]] []
+; CHECK: backtrack:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[IF_THEN595:%.*]] [label %if.else629]
+; CHECK: if.then595:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND616:%.*]] []
+; CHECK: for.cond616:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[ARG]])
+; CHECK-NEXT: to label [[FOR_BODY619:%.*]] [label %for.end626]
+; CHECK: for.body619:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_COND616]] []
+; CHECK: for.end626:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[FOR_END626_TARGET_WHILE_BODY248:%.*]] []
+; CHECK: if.else629:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETRY]] []
+; CHECK: for.end626.target.while.body248:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: retry.target.backtrack:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: retry.target.while.body248:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_WHILE_BODY248:%.*]] = phi i1 [ true, [[FOR_END626_TARGET_WHILE_BODY248]] ], [ false, [[RETRY_TARGET_BACKTRACK]] ], [ true, [[RETRY_TARGET_WHILE_BODY248:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_WHILE_BODY248]], label [[WHILE_BODY248:%.*]], label [[BACKTRACK]]
+;
+entry:
+ callbr void asm "", ""() to label %retry []
+
+retry:
+ callbr void asm "", "r,!i"(i1 %arg) to label %backtrack [label %while.body248]
+
+while.body248: ; preds = %for.end626, %retry
+ callbr void asm "", "r,!i"(i1 %arg) to label %if.then250 [label %if.end275]
+
+if.then250: ; preds = %while.body248
+ callbr void asm "", ""() to label %for.cond264 []
+
+for.cond264: ; preds = %for.body267, %if.then250
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body267 [label %backtrack]
+
+for.body267: ; preds = %for.cond264
+ callbr void asm "", ""() to label %for.cond264 []
+
+if.end275: ; preds = %while.body248
+ callbr void asm "", ""() to label %for.cond342 []
+
+for.cond342: ; preds = %for.body345, %if.end275
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body345 [label %for.end580]
+
+for.body345: ; preds = %for.cond342
+ callbr void asm "", ""() to label %for.cond342 []
+
+for.end580: ; preds = %for.cond342
+ callbr void asm "", ""() to label %backtrack []
+
+backtrack: ; preds = %for.end580, %for.cond264, %retry
+ callbr void asm "", "r,!i"(i1 %arg) to label %if.then595 [label %if.else629]
+
+if.then595: ; preds = %backtrack
+ callbr void asm "", ""() to label %for.cond616 []
+
+for.cond616: ; preds = %for.body619, %if.then595
+ callbr void asm "", "r,!i"(i1 %arg) to label %for.body619 [label %for.end626]
+
+for.body619: ; preds = %for.cond616
+ callbr void asm "", ""() to label %for.cond616 []
+
+for.end626: ; preds = %for.cond616
+ callbr void asm "", ""() to label %while.body248 []
+
+if.else629: ; preds = %backtrack
+ callbr void asm "", ""() to label %retry []
+}
diff --git a/llvm/test/Transforms/FixIrreducible/callbr.ll b/llvm/test/Transforms/FixIrreducible/callbr.ll
new file mode 100644
index 0000000000000..7386b1000889b
--- /dev/null
+++ b/llvm/test/Transforms/FixIrreducible/callbr.ll
@@ -0,0 +1,842 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -fix-irreducible --verify-loop-info -S | FileCheck %s
+; RUN: opt < %s -passes='fix-irreducible,verify<loops>' -S | FileCheck %s
+; RUN: opt < %s -passes='verify<loops>,fix-irreducible,verify<loops>' -S | FileCheck %s
+; RUN: opt < %s -passes='print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefix LOOPS-BEFORE
+; RUN: opt < %s -passes='fix-irreducible,print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefix LOOPS-AFTER
+
+; LOOPS-BEFORE: Loop info for function 'callbr_entry':{{$}}
+; LOOPS-AFTER: Loop info for function 'callbr_entry':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%indirect,%fallthrough<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%block1<exiting>,%block<latch>
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%indirect,%fallthrough<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets1':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets1':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%irr.guard1,%indirect1,%irr.guard2,%indirect<latch>,%fallthrough<exiting>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard2<header>,%indirect<exiting>,%fallthrough<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_header_no_indirect':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_header_no_indirect':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%fallthrough<exiting>,%callbr,%callbr.target.fallthrough<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_header':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_header':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%fallthrough<exiting>,%callbr<exiting>,%callbr.target.fallthrough<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_header_multiple_indirect_targets':{{$}}
+; LOOPS-BEFORE-NEXT: Loop at depth 1 containing: %callbr<header><exiting>,%indirect1<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_header_multiple_indirect_targets':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%fallthrough<exiting>,%callbr<exiting>,%indirect1,%callbr.target.fallthrough<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %callbr<header><exiting>,%indirect1<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_regular':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_regular':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%fallthrough2<exiting>,%fallthrough1<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard1<header>,%indirect2<exiting>,%indirect1<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard2<header>,%nocallbr2<exiting>,%nocallbr1<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_regular1':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_regular1':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%callbr<exiting>,%nocallbr<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_regular2':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_regular2':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%callbr<exiting>,%nocallbr<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_header_and_regular':{{$}}
+; LOOPS-BEFORE-NEXT: Loop at depth 1 containing: %callbr_header<header>,%mid<exiting>,%callbr_regular<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_header_and_regular':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %callbr_header<header>,%callbr_header.target.callbr_regular,%callbr_header.target.mid,%irr.guard,%callbr_regular<latch>,%mid<exiting>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard<header>,%callbr_regular<exiting>,%mid<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_only':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_only':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%callbr_block<exiting>,%callbr_header,%callbr_header.target.callbr_block<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'entry_multiple_callbr':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'entry_multiple_callbr':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%irr.guard1,%cb2,%cb2.target.block,%cb2.target.block1,%irr.guard2,%block<latch>,%block1<exiting>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard2<header>,%block<exiting>,%block1<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_exit_with_separate_entries':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_exit_with_separate_entries':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%l2,%irr.guard1,%l1<latch>,%cb<exiting>,%cb.target.l1{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard1<header>,%l1<exiting>,%cb<exiting>,%cb.target.l1<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_exit_with_separate_entries1':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_exit_with_separate_entries1':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%loop2,%loop1<latch>,%cb<exiting>,%cb.target.loop2<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_only_multiple':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_only_multiple':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%irr.guard1,%cb3<exiting>,%cb3.target.cb1,%irr.guard2,%cb1,%cb1.target.cb3<latch>,%cb2,%cb2.target.cb1,%cb2.target.cb3<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard2<header>,%cb1<exiting>,%cb2<exiting>,%cb2.target.cb1<latch>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_bypass':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_bypass':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%l1,%irr.guard1,%cb,%cb.target.l1<latch>,%l2<exiting>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard1<header>,%cb<exiting>,%l2<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_multiple_with_exit':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_multiple_with_exit':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%irr.guard1,%l3<exiting>,%irr.guard2,%l1<latch>,%l2<exiting>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 2 containing: %irr.guard2<header>,%l1<exiting>,%l2<latch><exiting>{{$}}
+
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_nested':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_nested':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%bb<exiting>,%bh<latch>{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard1<header>,%b<exiting>,%h<latch>{{$}}
+
+; Fix the irreducible loop in which callbr is the entry (see description at the
+; top of FixIrreducible.cpp).
+define void @callbr_entry(i1 %c) {
+; CHECK-LABEL: define void @callbr_entry(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %callbr.target.indirect]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[FALLTHROUGH]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ true, %[[CALLBR_TARGET_INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[FALLTHROUGH]]
+;
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br label %fallthrough
+ret:
+ ret void
+}
+
+define i32 @callbr_entry_targets_with_phi_nodes(i1 %c) {
+; CHECK-LABEL: define i32 @callbr_entry_targets_with_phi_nodes(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_BLOCK:.*]] [label %entry.target.block1]
+; CHECK: [[BLOCK:.*]]:
+; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, %[[BLOCK1:.*]] ], [ [[A_MOVED:%.*]], %[[IRR_GUARD:.*]] ]
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: br i1 [[C]], label %[[BLOCK]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret i32 [[B_MOVED:%.*]]
+; CHECK: [[ENTRY_TARGET_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_BLOCK1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[A_MOVED]] = phi i32 [ poison, %[[BLOCK]] ], [ 42, %[[ENTRY_TARGET_BLOCK]] ], [ poison, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: [[B_MOVED]] = phi i32 [ [[A]], %[[BLOCK]] ], [ poison, %[[ENTRY_TARGET_BLOCK]] ], [ 43, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: [[GUARD_BLOCK1:%.*]] = phi i1 [ true, %[[BLOCK]] ], [ false, %[[ENTRY_TARGET_BLOCK]] ], [ true, %[[ENTRY_TARGET_BLOCK1]] ]
+; CHECK-NEXT: br i1 [[GUARD_BLOCK1]], label %[[BLOCK1]], label %[[BLOCK]]
+;
+entry:
+ callbr void asm "", "!i"() to label %block [label %block1]
+block:
+ %a = phi i32 [42, %entry], [1, %block1]
+ br label %block1
+block1:
+ %b = phi i32 [43, %entry], [%a, %block]
+ br i1 %c, label %block, label %ret
+ret:
+ ret i32 %b
+}
+
+define void @callbr_entry_multiple_indirect_targets(i1 %c) {
+; CHECK-LABEL: define void @callbr_entry_multiple_indirect_targets(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[CALLBR_TARGET_INDIRECT:.*]], label %[[INDIRECT1:.*]], label %indirect2]
+; CHECK: [[INDIRECT3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[INDIRECT3]]
+; CHECK: [[INDIRECT1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[INDIRECT3]] ], [ true, %[[INDIRECT1]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ true, %[[CALLBR_TARGET_INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[INDIRECT3]]
+;
+callbr:
+ callbr void asm "", "!i,!i,!i"() to label %fallthrough [label %indirect, label %indirect1, label %indirect2]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br label %fallthrough
+indirect1:
+ br label %indirect
+indirect2:
+ br label %ret
+ret:
+ ret void
+}
+
+define void @callbr_entry_multiple_indirect_targets1(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_entry_multiple_indirect_targets1(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[CALLBR_TARGET_INDIRECT:.*]], label %[[CALLBR_TARGET_INDIRECT1:.*]], label %indirect2]
+; CHECK: [[INDIRECT3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2:.*]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[INDIRECT3]], label %[[IRR_GUARD:.*]]
+; CHECK: [[INDIRECT1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_TARGET_INDIRECT1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_INDIRECT1:%.*]] = phi i1 [ true, %[[INDIRECT]] ], [ false, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_INDIRECT]] ], [ true, %[[CALLBR_TARGET_INDIRECT1]] ]
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ false, %[[INDIRECT]] ], [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ false, %[[CALLBR_TARGET_INDIRECT]] ], [ false, %[[CALLBR_TARGET_INDIRECT1]] ]
+; CHECK-NEXT: [[GUARD_FALLTHROUGH_INV:%.*]] = xor i1 [[GUARD_FALLTHROUGH]], true
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT1]], label %[[INDIRECT1]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_INDIRECT:%.*]] = phi i1 [ true, %[[INDIRECT3]] ], [ [[GUARD_FALLTHROUGH_INV]], %[[IRR_GUARD1]] ], [ true, %[[INDIRECT1]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT]], label %[[INDIRECT]], label %[[INDIRECT3]]
+;
+callbr:
+ callbr void asm "", "!i,!i,!i"() to label %fallthrough [label %indirect, label %indirect1, label %indirect2]
+fallthrough:
+ br i1 %c, label %indirect, label %ret
+indirect:
+ br i1 %d, label %fallthrough, label %indirect1
+indirect1:
+ br label %indirect
+indirect2:
+ br label %ret
+ret:
+ ret void
+}
+
+; Fix the irreducible loop in which callbr is the header (see the example at the
+; top of FixIrreducible.cpp).
+define void @callbr_header_no_indirect(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header_no_indirect(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] []
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", ""() to label %fallthrough []
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+; Fix the irreducible loop in which callbr is the header.
+define void @callbr_header(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %indirect]
+; CHECK: [[INDIRECT:.*:]]
+; CHECK-NEXT: br label %[[RET:.*]]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+indirect:
+ br label %ret
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+define void @callbr_header_multiple_indirect_targets(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_header_multiple_indirect_targets(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[CALLBR_TARGET_FALLTHROUGH:.*]] [label %[[INDIRECT1:.*]], label %indirect1]
+; CHECK: [[INDIRECT1]]:
+; CHECK-NEXT: br label %[[RET:.*]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br label %[[CALLBR]]
+; CHECK: [[FALLTHROUGH:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[CALLBR]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_TARGET_FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH:%.*]] = phi i1 [ true, %[[CALLBR_TARGET_FALLTHROUGH]] ], [ [[D_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH]], label %[[FALLTHROUGH]], label %[[CALLBR]]
+;
+ br i1 %d, label %callbr, label %fallthrough
+callbr:
+ callbr void asm "", "!i,!i"() to label %fallthrough [label %indirect, label %indirect1]
+indirect:
+ br label %ret
+indirect1:
+ br label %callbr
+fallthrough:
+ br i1 %c, label %callbr, label %ret
+ret:
+ ret void
+}
+
+; Fix the three usual irreducible loops (callbr isn't a part of one of them):
+; - fallthrough, fallthrough1, fallthrough2
+; - indirect, indirect1, indirect2
+; - nocallbr, nocallbr1, nocallbr2
+define void @callbr_regular(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_regular(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br i1 [[D]], label %[[CALLBR:.*]], label %[[NOCALLBR:.*]]
+; CHECK: [[CALLBR]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[FALLTHROUGH:.*]] [label %indirect]
+; CHECK: [[FALLTHROUGH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[FALLTHROUGH1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[FALLTHROUGH2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1]], label %[[RET:.*]]
+; CHECK: [[INDIRECT:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[INDIRECT1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[INDIRECT2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[INDIRECT1]], label %[[RET]]
+; CHECK: [[NOCALLBR]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2:.*]]
+; CHECK: [[NOCALLBR1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[NOCALLBR2:.*]]:
+; CHECK-NEXT: br i1 [[D]], label %[[NOCALLBR1]], label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_FALLTHROUGH2:%.*]] = phi i1 [ true, %[[FALLTHROUGH1]] ], [ [[C_INV]], %[[FALLTHROUGH]] ]
+; CHECK-NEXT: br i1 [[GUARD_FALLTHROUGH2]], label %[[FALLTHROUGH2]], label %[[FALLTHROUGH1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_INDIRECT2:%.*]] = phi i1 [ true, %[[INDIRECT1]] ], [ [[C_INV]], %[[INDIRECT]] ]
+; CHECK-NEXT: br i1 [[GUARD_INDIRECT2]], label %[[INDIRECT2]], label %[[INDIRECT1]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_NOCALLBR2:%.*]] = phi i1 [ true, %[[NOCALLBR1]] ], [ [[C_INV]], %[[NOCALLBR]] ]
+; CHECK-NEXT: br i1 [[GUARD_NOCALLBR2]], label %[[NOCALLBR2]], label %[[NOCALLBR1]]
+;
+ br i1 %d, label %callbr, label %nocallbr
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %c, label %fallthrough1, label %fallthrough2
+fallthrough1:
+ br label %fallthrough2
+fallthrough2:
+ br i1 %d, label %fallthrough1, label %ret
+indirect:
+ br i1 %c, label %indirect1, label %indirect2
+indirect1:
+ br label %indirect2
+indirect2:
+ br i1 %d, label %indirect1, label %ret
+nocallbr:
+ br i1 %c, label %nocallbr1, label %nocallbr2
+nocallbr1:
+ br label %nocallbr2
+nocallbr2:
+ br i1 %d, label %nocallbr1, label %ret
+ret:
+ ret void
+}
+
+; Fix an irreducible loop in which callbr is a regular block (neither entry nor
+; header). See the example at the top of FixIrreducible.cpp.
+define void @callbr_regular1(i1 %c) {
+; CHECK-LABEL: define void @callbr_regular1(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[NOCALLBR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[RET:.*]] [label %nocallbr]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR:%.*]] = phi i1 [ true, %[[NOCALLBR]] ], [ [[C_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR]], label %[[CALLBR]], label %[[NOCALLBR]]
+;
+ br i1 %c, label %nocallbr, label %callbr
+nocallbr:
+ br label %callbr
+callbr:
+ callbr void asm "", "!i"() to label %ret [label %nocallbr]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop in which callbr is a regular block (neither entry nor
+; header). See the example at the top of FixIrreducible.cpp.
+define void @callbr_regular2(i1 %c) {
+; CHECK-LABEL: define void @callbr_regular2(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[NOCALLBR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[NOCALLBR]] [label %ret]
+; CHECK: [[RET:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR:%.*]] = phi i1 [ true, %[[NOCALLBR]] ], [ [[C_INV]], [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR]], label %[[CALLBR]], label %[[NOCALLBR]]
+;
+ br i1 %c, label %nocallbr, label %callbr
+nocallbr:
+ br label %callbr
+callbr:
+ callbr void asm "", "!i"() to label %nocallbr [label %ret]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop with two callbr blocks, one as header and one as regular block.
+define void @callbr_header_and_regular(i1 %c) {
+; CHECK-LABEL: define void @callbr_header_and_regular(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: br label %[[CALLBR_HEADER:.*]]
+; CHECK: [[CALLBR_HEADER]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER_TARGET_MID:.*]] [label %callbr_header.target.callbr_regular]
+; CHECK: [[MID:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD:.*]], label %[[RET:.*]]
+; CHECK: [[CALLBR_REGULAR:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER]] [label %mid]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_HEADER_TARGET_MID]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_HEADER_TARGET_CALLBR_REGULAR:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR_REGULAR:%.*]] = phi i1 [ true, %[[MID]] ], [ false, %[[CALLBR_HEADER_TARGET_MID]] ], [ true, %[[CALLBR_HEADER_TARGET_CALLBR_REGULAR]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR_REGULAR]], label %[[CALLBR_REGULAR]], label %[[MID]]
+;
+ br label %callbr_header
+callbr_header:
+ callbr void asm "", "!i"() to label %mid [label %callbr_regular]
+mid:
+ br i1 %c, label %callbr_regular, label %ret
+callbr_regular:
+ callbr void asm "", "!i"() to label %callbr_header [label %mid]
+ret:
+ ret void
+}
+
+; Fix an irreducible loop consisting only of callbr blocks (and ret). See the
+; example at the top of FixIrreducible.cpp.
+define void @callbr_only(i1 %c) {
+; CHECK-LABEL: define void @callbr_only(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: [[CALLBR:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_ENTRY_TARGET_CALLBR_HEADER:.*]] [label %callbr_entry.target.callbr_block]
+; CHECK: [[CALLBR_HEADER:.*]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[CALLBR_HEADER_TARGET_CALLBR_BLOCK:.*]] []
+; CHECK: [[CALLBR_BLOCK:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CALLBR_HEADER]] [label %ret]
+; CHECK: [[RET:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[CALLBR_HEADER_TARGET_CALLBR_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CALLBR_ENTRY_TARGET_CALLBR_HEADER]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CALLBR_ENTRY_TARGET_CALLBR_BLOCK:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CALLBR_BLOCK:%.*]] = phi i1 [ true, %[[CALLBR_HEADER_TARGET_CALLBR_BLOCK]] ], [ false, %[[CALLBR_ENTRY_TARGET_CALLBR_HEADER]] ], [ true, %[[CALLBR_ENTRY_TARGET_CALLBR_BLOCK]] ]
+; CHECK-NEXT: br i1 [[GUARD_CALLBR_BLOCK]], label %[[CALLBR_BLOCK]], label %[[CALLBR_HEADER]]
+;
+callbr_entry:
+ callbr void asm "", "!i"() to label %callbr_header [label %callbr_block]
+callbr_header:
+ callbr void asm "", ""() to label %callbr_block []
+callbr_block:
+ callbr void asm "", "!i"() to label %callbr_header [label %ret]
+ret:
+ ret void
+}
+
+; Irreducible loop: entry leading to multiple callbr blocks.
+define void @entry_multiple_callbr(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @entry_multiple_callbr(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[A]], label %[[CB1:.*]], label %[[IRR_GUARD:.*]]
+; CHECK: [[CB1]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[CB1_TARGET_BLOCK:.*]] [label %[[CB1_TARGET_CB2:.*]], label %cb1.target.block1]
+; CHECK: [[BLOCK:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[IRR_GUARD]], label %[[BLOCK1:.*]]
+; CHECK: [[CB2:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2_TARGET_BLOCK1:.*]] [label %cb2.target.block]
+; CHECK: [[BLOCK1]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2:.*]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB1_TARGET_BLOCK]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CB1_TARGET_CB2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[CB1_TARGET_BLOCK1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CB2:%.*]] = phi i1 [ true, %[[BLOCK]] ], [ false, %[[CB1_TARGET_BLOCK]] ], [ true, %[[CB1_TARGET_CB2]] ], [ false, %[[CB1_TARGET_BLOCK1]] ], [ true, %[[ENTRY]] ]
+; CHECK-NEXT: [[GUARD_BLOCK:%.*]] = phi i1 [ false, %[[BLOCK]] ], [ true, %[[CB1_TARGET_BLOCK]] ], [ false, %[[CB1_TARGET_CB2]] ], [ false, %[[CB1_TARGET_BLOCK1]] ], [ false, %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB2]], label %[[CB2]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB2_TARGET_BLOCK1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB2_TARGET_BLOCK:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_BLOCK3:%.*]] = phi i1 [ true, %[[BLOCK1]] ], [ [[GUARD_BLOCK]], %[[IRR_GUARD1]] ], [ false, %[[CB2_TARGET_BLOCK1]] ], [ true, %[[CB2_TARGET_BLOCK]] ]
+; CHECK-NEXT: br i1 [[GUARD_BLOCK3]], label %[[BLOCK]], label %[[BLOCK1]]
+;
+entry:
+ br i1 %a, label %cb1, label %cb2
+cb1:
+ callbr void asm "", "!i,!i"() to label %block [label %cb2, label %block1]
+block:
+ br i1 %b, label %cb2, label %block1
+cb2:
+ callbr void asm "", "!i"() to label %block1 [label %block]
+block1:
+ br i1 %c, label %block, label %exit
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr as loop exit, with multiple entries
+define void @callbr_exit_with_separate_entries(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_exit_with_separate_entries(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[CB:.*]], label %[[IRR_GUARD]]
+; CHECK: [[L2:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[CB]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[EXIT:.*]] [label %cb.target.l1]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L2:%.*]] = phi i1 [ true, %[[L1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_L2]], label %[[L2]], label %[[IRR_GUARD1]]
+; CHECK: [[CB_TARGET_L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ true, %[[CB_TARGET_L1]] ], [ true, %[[IRR_GUARD]] ], [ [[C_INV]], %[[L2]] ]
+; CHECK-NEXT: br i1 [[GUARD_L1]], label %[[L1]], label %[[CB]]
+;
+entry:
+ br i1 %a, label %l1, label %l2
+l1:
+ br i1 %b, label %cb, label %l2
+l2:
+ br i1 %c, label %cb, label %l1
+cb:
+ callbr void asm "", "!i"() to label %exit [label %l1]
+exit:
+ ret void
+}
+
+define void @callbr_exit_with_separate_entries1(i1 %a, i1 %b) {
+; CHECK-LABEL: define void @callbr_exit_with_separate_entries1(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[LOOP1:.*]]:
+; CHECK-NEXT: br i1 [[B]], label %[[CB:.*]], label %[[IRR_GUARD]]
+; CHECK: [[LOOP2:.*]]:
+; CHECK-NEXT: br label %[[LOOP1]]
+; CHECK: [[CB]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[EXIT:.*]] [label %cb.target.loop2]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB_TARGET_LOOP2:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_LOOP2:%.*]] = phi i1 [ true, %[[CB_TARGET_LOOP2]] ], [ true, %[[LOOP1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_LOOP2]], label %[[LOOP2]], label %[[LOOP1]]
+;
+entry:
+ br i1 %a, label %loop1, label %loop2
+loop1:
+ br i1 %b, label %cb, label %loop2
+loop2:
+ br label %loop1
+cb:
+ callbr void asm "", "!i"() to label %exit [label %loop2]
+exit:
+ ret void
+}
+
+; Irreducible loop: all blocks are callbrs, with cross-edges
+define void @callbr_only_multiple(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_only_multiple(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_CB1:.*]] [label %[[ENTRY_TARGET_CB2:.*]], label %entry.target.cb3]
+; CHECK: [[CB1:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2:.*]] [label %cb1.target.cb3]
+; CHECK: [[CB2]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB2_TARGET_CB3:.*]] [label %cb2.target.cb1]
+; CHECK: [[CB3:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[CB3_TARGET_CB1:.*]] [label %exit]
+; CHECK: [[EXIT:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[CB2_TARGET_CB3]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CB1_TARGET_CB3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_CB3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_CB3:%.*]] = phi i1 [ true, %[[CB2_TARGET_CB3]] ], [ true, %[[CB1_TARGET_CB3]] ], [ false, %[[ENTRY_TARGET_CB1]] ], [ false, %[[ENTRY_TARGET_CB2]] ], [ true, %[[ENTRY_TARGET_CB3]] ]
+; CHECK-NEXT: [[GUARD_CB1:%.*]] = phi i1 [ false, %[[CB2_TARGET_CB3]] ], [ false, %[[CB1_TARGET_CB3]] ], [ true, %[[ENTRY_TARGET_CB1]] ], [ false, %[[ENTRY_TARGET_CB2]] ], [ false, %[[ENTRY_TARGET_CB3]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB3]], label %[[CB3]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2:.*]]
+; CHECK: [[CB2_TARGET_CB1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[CB3_TARGET_CB1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_CB13:%.*]] = phi i1 [ true, %[[CB2_TARGET_CB1]] ], [ [[GUARD_CB1]], %[[IRR_GUARD1]] ], [ true, %[[CB3_TARGET_CB1]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB13]], label %[[CB1]], label %[[CB2]]
+;
+entry:
+ callbr void asm "", "!i,!i"() to label %cb1 [label %cb2, label %cb3]
+cb1:
+ callbr void asm "", "!i"() to label %cb2 [label %cb3]
+cb2:
+ callbr void asm "", "!i"() to label %cb3 [label %cb1]
+cb3:
+ callbr void asm "", "!i"() to label %cb1 [label %exit]
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr as a "bypass" block
+define void @callbr_bypass(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_bypass(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[B_INV:%.*]] = xor i1 [[B]], true
+; CHECK-NEXT: [[A_INV:%.*]] = xor i1 [[A]], true
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[CB:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[L2:.*]] [label %cb.target.l1]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[L2]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD1]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CB_TARGET_L1:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ true, %[[CB_TARGET_L1]] ], [ [[A_INV]], %[[ENTRY]] ]
+; CHECK-NEXT: br i1 [[GUARD_L1]], label %[[L1]], label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_CB:%.*]] = phi i1 [ true, %[[L2]] ], [ true, %[[IRR_GUARD]] ], [ [[B_INV]], %[[L1]] ]
+; CHECK-NEXT: br i1 [[GUARD_CB]], label %[[CB]], label %[[L2]]
+;
+entry:
+ br i1 %a, label %cb, label %l1
+cb:
+ callbr void asm "", "!i"() to label %l2 [label %l1]
+l1:
+ br i1 %b, label %l2, label %cb
+l2:
+ br i1 %c, label %cb, label %exit
+exit:
+ ret void
+}
+
+; Irreducible loop: callbr with multiple indirect targets, some looping, some exiting
+define void @callbr_multiple_with_exit(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: define void @callbr_multiple_with_exit(
+; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]], i1 [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i,!i,!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_L1:.*]] [label %[[ENTRY_TARGET_L2:.*]], label %[[EXIT:.*]], label %entry.target.l3]
+; CHECK: [[L1:.*]]:
+; CHECK-NEXT: br i1 [[A]], label %[[L2:.*]], label %[[IRR_GUARD:.*]]
+; CHECK: [[L2]]:
+; CHECK-NEXT: br i1 [[B]], label %[[IRR_GUARD2:.*]], label %[[EXIT]]
+; CHECK: [[L3:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[IRR_GUARD2]], label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+; CHECK: [[ENTRY_TARGET_L1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_L2]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[ENTRY_TARGET_L3:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_L3:%.*]] = phi i1 [ true, %[[L1]] ], [ false, %[[ENTRY_TARGET_L1]] ], [ false, %[[ENTRY_TARGET_L2]] ], [ true, %[[ENTRY_TARGET_L3]] ]
+; CHECK-NEXT: [[GUARD_L1:%.*]] = phi i1 [ false, %[[L1]] ], [ true, %[[ENTRY_TARGET_L1]] ], [ false, %[[ENTRY_TARGET_L2]] ], [ false, %[[ENTRY_TARGET_L3]] ]
+; CHECK-NEXT: br i1 [[GUARD_L3]], label %[[L3]], label %[[IRR_GUARD1:.*]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: br label %[[IRR_GUARD2]]
+; CHECK: [[IRR_GUARD2]]:
+; CHECK-NEXT: [[GUARD_L13:%.*]] = phi i1 [ true, %[[L2]] ], [ [[GUARD_L1]], %[[IRR_GUARD1]] ], [ true, %[[L3]] ]
+; CHECK-NEXT: br i1 [[GUARD_L13]], label %[[L1]], label %[[L2]]
+;
+entry:
+ callbr void asm "", "!i,!i,!i"() to label %l1 [label %l2, label %exit, label %l3]
+l1:
+ br i1 %a, label %l2, label %l3
+l2:
+ br i1 %b, label %l1, label %exit
+l3:
+ br i1 %c, label %l1, label %exit
+exit:
+ ret void
+}
+
+define void @callbr_nested(i1 %c, i1 %d) {
+; CHECK-LABEL: define void @callbr_nested(
+; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[ENTRY_TARGET_H:.*]] [label %entry.target.b]
+; CHECK: [[H:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1:.*]]
+; CHECK: [[B:.*]]:
+; CHECK-NEXT: callbr void asm "", "!i,!i"()
+; CHECK-NEXT: to label %[[H]] [label %[[B_TARGET_BH:.*]], label %b.target.bb]
+; CHECK: [[BH:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD:.*]]
+; CHECK: [[BB:.*]]:
+; CHECK-NEXT: br i1 [[C]], label %[[BH]], label %[[RET:.*]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+; CHECK: [[B_TARGET_BH]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[B_TARGET_BB:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD]]
+; CHECK: [[IRR_GUARD]]:
+; CHECK-NEXT: [[GUARD_BB:%.*]] = phi i1 [ true, %[[BH]] ], [ false, %[[B_TARGET_BH]] ], [ true, %[[B_TARGET_BB]] ]
+; CHECK-NEXT: br i1 [[GUARD_BB]], label %[[BB]], label %[[BH]]
+; CHECK: [[ENTRY_TARGET_H]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[ENTRY_TARGET_B:.*]]:
+; CHECK-NEXT: br label %[[IRR_GUARD1]]
+; CHECK: [[IRR_GUARD1]]:
+; CHECK-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, %[[H]] ], [ false, %[[ENTRY_TARGET_H]] ], [ true, %[[ENTRY_TARGET_B]] ]
+; CHECK-NEXT: br i1 [[GUARD_B]], label %[[B]], label %[[H]]
+;
+entry:
+ callbr void asm "","!i"() to label %h [label %b]
+h:
+ br label %b
+b:
+ callbr void asm "","!i,!i"() to label %h [label %bh, label %bb]
+bh:
+ br label %bb
+bb:
+ br i1 %c, label %bh, label %ret
+ret:
+ ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; LOOPS-AFTER: {{.*}}
+; LOOPS-BEFORE: {{.*}}
diff --git a/llvm/test/Transforms/FixIrreducible/nested.ll b/llvm/test/Transforms/FixIrreducible/nested.ll
index 0cc6b473d62f6..c9161cc14f208 100644
--- a/llvm/test/Transforms/FixIrreducible/nested.ll
+++ b/llvm/test/Transforms/FixIrreducible/nested.ll
@@ -50,6 +50,69 @@ exit:
ret void
}
+define void @nested_irr_top_level_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5) {
+; CHECK-LABEL: @nested_irr_top_level_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A1:%.*]] [label %entry.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A1_TARGET_B1:%.*]] [label %A1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[B1_TARGET_B2:%.*]] [label %A3]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_A1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: B1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: A1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: A1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[B1_TARGET_B2]] ], [ false, [[A1_TARGET_B1]] ], [ true, [[A1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %B2 [label %A3]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %B1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %A1 [label %exit]
+
+exit:
+ ret void
+}
+
define void @nested_irr_in_loop(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
; CHECK-LABEL: @nested_irr_in_loop(
; CHECK-NEXT: entry:
@@ -107,6 +170,80 @@ exit:
ret void
}
+define void @nested_irr_in_loop_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
+; CHECK-LABEL: @nested_irr_in_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[H1:%.*]]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A1_TARGET_B1:%.*]] [label %A1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[B1_TARGET_B2:%.*]] [label %A3]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %L1]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %H1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: B1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: A1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: A1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[B1_TARGET_B2]] ], [ false, [[A1_TARGET_B1]] ], [ true, [[A1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ br label %H1
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %B2 [label %A3]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %B1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %A2 [label %L1]
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %exit [label %H1]
+
+exit:
+ ret void
+}
+
define void @loop_in_irr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr(
; CHECK-NEXT: entry:
@@ -150,6 +287,60 @@ exit:
ret void
}
+define void @loop_in_irr_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A1:%.*]] [label %entry.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H1:%.*]] []
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1:%.*]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_A1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %H1 []
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %A1 []
+
+exit:
+ ret void
+}
+
define void @loop_in_irr_shared_entry(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr_shared_entry(
; CHECK-NEXT: entry:
@@ -188,6 +379,54 @@ exit:
ret void
}
+define void @loop_in_irr_shared_entry_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_shared_entry_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_H1:%.*]] [label %entry.target.A2]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A3_TARGET_A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H1]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A3.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: entry.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A3_TARGET_A2]] ], [ false, [[ENTRY_TARGET_H1]] ], [ true, [[ENTRY_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[H1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %A2]
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+exit:
+ ret void
+}
+
define void @loop_in_irr_shared_header(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
; CHECK-LABEL: @loop_in_irr_shared_header(
; CHECK-NEXT: entry:
@@ -226,6 +465,56 @@ exit:
ret void
}
+define void @loop_in_irr_shared_header_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2) {
+; CHECK-LABEL: @loop_in_irr_shared_header_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[ENTRY_TARGET_A2:%.*]] [label %entry.target.H1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L1:%.*]] []
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[L1_TARGET_H1:%.*]] [label %A3]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A2:%.*]] [label %exit]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A2_TARGET_H1:%.*]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: L1.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: entry.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_H1:%.*]] = phi i1 [ true, [[A2_TARGET_H1]] ], [ true, [[L1_TARGET_H1]] ], [ false, [[ENTRY_TARGET_A2]] ], [ true, [[ENTRY_TARGET_H1:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_H1]], label [[H1:%.*]], label [[A2]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A2 [label %H1]
+
+H1:
+ callbr void asm "", ""() to label %L1 []
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %H1 [label %A3]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %exit]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+exit:
+ ret void
+}
+
define void @loop_irr_loop_shared_header(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3) {
; CHECK-LABEL: @loop_irr_loop_shared_header(
; CHECK-NEXT: entry:
@@ -269,6 +558,62 @@ exit:
ret void
}
+define void @loop_irr_loop_shared_header_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3) {
+; CHECK-LABEL: @loop_irr_loop_shared_header_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H2_TARGET_A2:%.*]] [label %H2.target.H1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[A3:%.*]] [label %H1.target.H1]
+; CHECK: A3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A2:%.*]] [label %L2]
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A2_TARGET_H1:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H2.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H2.target.H1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_H1:%.*]] = phi i1 [ true, [[A2_TARGET_H1]] ], [ true, [[H1_TARGET_H1:%.*]] ], [ false, [[H2_TARGET_A2]] ], [ true, [[H2_TARGET_H1:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_H1]], label [[H1:%.*]], label [[A2]]
+;
+entry:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %A2 [label %H1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A3 [label %H1]
+
+A3:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A2 [label %L2]
+
+A2:
+ callbr void asm "", ""() to label %H1 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H2 [label %exit]
+
+exit:
+ ret void
+}
+
define void @siblings_top_level(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
; CHECK-LABEL: @siblings_top_level(
; CHECK-NEXT: entry:
@@ -336,6 +681,93 @@ exit:
ret void
}
+define void @siblings_top_level_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6) {
+; CHECK-LABEL: @siblings_top_level_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %fork1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1_TARGET_A2:%.*]] []
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %exit]
+; CHECK: fork1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[FORK1_TARGET_B1:%.*]] [label %fork1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L2:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %L2.target.B2]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %exit]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A1_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: L2.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: fork1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: fork1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[L2_TARGET_B2:%.*]] ], [ false, [[FORK1_TARGET_B1]] ], [ true, [[FORK1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %fork1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %A2 []
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H1 [label %exit]
+
+fork1:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", ""() to label %L2 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %H2 [label %B2]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %B1 [label %exit]
+
+exit:
+ ret void
+}
+
define void @siblings_in_loop(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7) {
; CHECK-LABEL: @siblings_in_loop(
; CHECK-NEXT: entry:
@@ -413,6 +845,105 @@ exit:
ret void
}
+define void @siblings_in_loop_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7) {
+; CHECK-LABEL: @siblings_in_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H0:%.*]] []
+; CHECK: H0:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[H1:%.*]] [label %fork1]
+; CHECK: H1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[H1_TARGET_A1:%.*]] [label %H1.target.A2]
+; CHECK: A1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A1_TARGET_A2:%.*]] []
+; CHECK: A2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[A1:%.*]] [label %L1]
+; CHECK: L1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[H1]] [label %L0]
+; CHECK: fork1:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[FORK1_TARGET_B1:%.*]] [label %fork1.target.B2]
+; CHECK: B1:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[H2:%.*]] []
+; CHECK: H2:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[L2:%.*]] []
+; CHECK: L2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[H2]] [label %L2.target.B2]
+; CHECK: B2:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[B1:%.*]] [label %L0]
+; CHECK: L0:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED7:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %H0]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: H1.target.A1:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: H1.target.A2:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_A2:%.*]] = phi i1 [ true, [[A1_TARGET_A2]] ], [ false, [[H1_TARGET_A1]] ], [ true, [[H1_TARGET_A2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_A2]], label [[A2:%.*]], label [[A1]]
+; CHECK: L2.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1:%.*]]
+; CHECK: fork1.target.B1:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: fork1.target.B2:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_B2:%.*]] = phi i1 [ true, [[L2_TARGET_B2:%.*]] ], [ false, [[FORK1_TARGET_B1]] ], [ true, [[FORK1_TARGET_B2:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_B2]], label [[B2:%.*]], label [[B1]]
+;
+entry:
+ callbr void asm "", ""() to label %H0 []
+
+H0:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %H1 [label %fork1]
+
+H1:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %A1 [label %A2]
+
+A1:
+ callbr void asm "", ""() to label %A2 []
+
+A2:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %A1 [label %L1]
+
+L1:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %H1 [label %L0]
+
+fork1:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %B1 [label %B2]
+
+B1:
+ callbr void asm "", ""() to label %H2 []
+
+H2:
+ callbr void asm "", ""() to label %L2 []
+
+L2:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %H2 [label %B2]
+
+B2:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %B1 [label %L0]
+
+L0:
+ callbr void asm "", "r,!i"(i1 %Pred7) to label %exit [label %H0]
+
+exit:
+ ret void
+}
+
define void @irr_in_irr_shared_entry(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7, i1 %Pred8, i1 %Pred9, i1 %Pred10, i1 %Pred11, i1 %Pred12, i1 %Pred13) {
; CHECK-LABEL: @irr_in_irr_shared_entry(
; CHECK-NEXT: entry:
@@ -527,3 +1058,148 @@ if.end8.i:
exit:
ret void
}
+
+define void @irr_in_irr_shared_entry_callbr(i1 %Pred0, i1 %Pred1, i1 %Pred2, i1 %Pred3, i1 %Pred4, i1 %Pred5, i1 %Pred6, i1 %Pred7, i1 %Pred8, i1 %Pred9, i1 %Pred10, i1 %Pred11, i1 %Pred12, i1 %Pred13) {
+; CHECK-LABEL: @irr_in_irr_shared_entry_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED0:%.*]])
+; CHECK-NEXT: to label [[IF_END:%.*]] [label %if.then]
+; CHECK: if.end:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED1:%.*]])
+; CHECK-NEXT: to label [[IF_THEN7:%.*]] [label %if.else]
+; CHECK: if.then7:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[IF_END16:%.*]] []
+; CHECK: if.else:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[IF_END16]] []
+; CHECK: if.end16:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED2:%.*]])
+; CHECK-NEXT: to label [[WHILE_COND_PREHEADER:%.*]] [label %if.then39]
+; CHECK: while.cond.preheader:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[WHILE_COND:%.*]] []
+; CHECK: while.cond:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED3:%.*]])
+; CHECK-NEXT: to label [[WHILE_COND_TARGET_COND_TRUE49:%.*]] [label %lor.rhs]
+; CHECK: cond.true49:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED4:%.*]])
+; CHECK-NEXT: to label [[IF_THEN69:%.*]] [label %cond.true49.target.while.body63]
+; CHECK: while.body63:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED5:%.*]])
+; CHECK-NEXT: to label [[EXIT:%.*]] [label %while.cond47]
+; CHECK: while.cond47:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED6:%.*]])
+; CHECK-NEXT: to label [[COND_TRUE49:%.*]] [label %while.cond47.target.cond.end61]
+; CHECK: cond.end61:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED7:%.*]])
+; CHECK-NEXT: to label [[COND_END61_TARGET_WHILE_BODY63:%.*]] [label %while.cond]
+; CHECK: if.then69:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED8:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %while.cond]
+; CHECK: lor.rhs:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED9:%.*]])
+; CHECK-NEXT: to label [[LOR_RHS_TARGET_COND_END61:%.*]] [label %while.end76]
+; CHECK: while.end76:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: if.then39:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED10:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end.i145]
+; CHECK: if.end.i145:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED11:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end8.i149]
+; CHECK: if.end8.i149:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: if.then:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED12:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end.i]
+; CHECK: if.end.i:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PRED13:%.*]])
+; CHECK-NEXT: to label [[EXIT]] [label %if.end8.i]
+; CHECK: if.end8.i:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[EXIT]] []
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: while.cond47.target.cond.end61:
+; CHECK-NEXT: br label [[IRR_GUARD:%.*]]
+; CHECK: lor.rhs.target.cond.end61:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: while.cond.target.cond.true49:
+; CHECK-NEXT: br label [[IRR_GUARD]]
+; CHECK: irr.guard:
+; CHECK-NEXT: [[GUARD_COND_END61:%.*]] = phi i1 [ true, [[WHILE_COND47_TARGET_COND_END61:%.*]] ], [ true, [[LOR_RHS_TARGET_COND_END61]] ], [ false, [[WHILE_COND_TARGET_COND_TRUE49]] ]
+; CHECK-NEXT: br i1 [[GUARD_COND_END61]], label [[COND_END61:%.*]], label [[IRR_GUARD1:%.*]]
+; CHECK: cond.true49.target.while.body63:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: cond.end61.target.while.body63:
+; CHECK-NEXT: br label [[IRR_GUARD1]]
+; CHECK: irr.guard1:
+; CHECK-NEXT: [[GUARD_WHILE_BODY63:%.*]] = phi i1 [ true, [[COND_TRUE49_TARGET_WHILE_BODY63:%.*]] ], [ true, [[COND_END61_TARGET_WHILE_BODY63]] ], [ false, [[IRR_GUARD]] ]
+; CHECK-NEXT: br i1 [[GUARD_WHILE_BODY63]], label [[WHILE_BODY63:%.*]], label [[COND_TRUE49]]
+;
+entry:
+ callbr void asm "", "r,!i"(i1 %Pred0) to label %if.end [label %if.then]
+
+if.end:
+ callbr void asm "", "r,!i"(i1 %Pred1) to label %if.then7 [label %if.else]
+
+if.then7:
+ callbr void asm "", ""() to label %if.end16 []
+
+if.else:
+ callbr void asm "", ""() to label %if.end16 []
+
+if.end16:
+ callbr void asm "", "r,!i"(i1 %Pred2) to label %while.cond.preheader [label %if.then39]
+
+while.cond.preheader:
+ callbr void asm "", ""() to label %while.cond []
+
+while.cond:
+ callbr void asm "", "r,!i"(i1 %Pred3) to label %cond.true49 [label %lor.rhs]
+
+cond.true49:
+ callbr void asm "", "r,!i"(i1 %Pred4) to label %if.then69 [label %while.body63]
+
+while.body63:
+ callbr void asm "", "r,!i"(i1 %Pred5) to label %exit [label %while.cond47]
+
+while.cond47:
+ callbr void asm "", "r,!i"(i1 %Pred6) to label %cond.true49 [label %cond.end61]
+
+cond.end61:
+ callbr void asm "", "r,!i"(i1 %Pred7) to label %while.body63 [label %while.cond]
+
+if.then69:
+ callbr void asm "", "r,!i"(i1 %Pred8) to label %exit [label %while.cond]
+
+lor.rhs:
+ callbr void asm "", "r,!i"(i1 %Pred9) to label %cond.end61 [label %while.end76]
+
+while.end76:
+ callbr void asm "", ""() to label %exit []
+
+if.then39:
+ callbr void asm "", "r,!i"(i1 %Pred10) to label %exit [label %if.end.i145]
+
+if.end.i145:
+ callbr void asm "", "r,!i"(i1 %Pred11) to label %exit [label %if.end8.i149]
+
+if.end8.i149:
+ callbr void asm "", ""() to label %exit []
+
+if.then:
+ callbr void asm "", "r,!i"(i1 %Pred12) to label %exit [label %if.end.i]
+
+if.end.i:
+ callbr void asm "", "r,!i"(i1 %Pred13) to label %exit [label %if.end8.i]
+
+if.end8.i:
+ callbr void asm "", ""() to label %exit []
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/FixIrreducible/unreachable.ll b/llvm/test/Transforms/FixIrreducible/unreachable.ll
index defbefb3ba812..845cf507c7fc0 100644
--- a/llvm/test/Transforms/FixIrreducible/unreachable.ll
+++ b/llvm/test/Transforms/FixIrreducible/unreachable.ll
@@ -25,3 +25,26 @@ loop.latch:
loop.exit:
ret void
}
+
+; CHECK-LABEL: @unreachable_callbr(
+; CHECK: entry:
+; CHECK-NOT: irr.guard:
+define void @unreachable_callbr(i32 %n, i1 %arg) {
+entry:
+ callbr void asm "", ""() to label %loop.body []
+
+loop.body:
+ callbr void asm "", ""() to label %inner.block []
+
+unreachable.block:
+ callbr void asm "", ""() to label %inner.block []
+
+inner.block:
+ callbr void asm "", "r,!i"(i1 %arg) to label %loop.exit [label %loop.latch]
+
+loop.latch:
+ callbr void asm "", ""() to label %loop.body []
+
+loop.exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/basic.ll b/llvm/test/Transforms/UnifyLoopExits/basic.ll
index ccd15d4e6b943..d04d142f196d3 100644
--- a/llvm/test/Transforms/UnifyLoopExits/basic.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/basic.ll
@@ -18,12 +18,12 @@ define void @loop_1(i1 %PredEntry, i1 %PredB, i1 %PredC, i1 %PredD) {
; CHECK: F:
; CHECK-NEXT: br label [[EXIT]]
; CHECK: G:
-; CHECK-NEXT: br label [[F:%.*]]
+; CHECK-NEXT: br label [[Y:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret void
; CHECK: loop.exit.guard:
-; CHECK-NEXT: [[GUARD_E:%.*]] = phi i1 [ true, [[B]] ], [ false, [[C]] ], [ false, [[D]] ]
-; CHECK-NEXT: br i1 [[GUARD_E]], label [[E:%.*]], label [[F]]
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[B]] ], [ false, [[C]] ], [ false, [[D]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[Y]]
;
entry:
br i1 %PredEntry, label %A, label %G
@@ -53,6 +53,67 @@ exit:
ret void
}
+define void @loop_1_callbr(i1 %PredEntry, i1 %PredB, i1 %PredC, i1 %PredD) {
+; CHECK-LABEL: @loop_1_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]]
+; CHECK: A:
+; CHECK-NEXT: br label [[B:%.*]]
+; CHECK: B:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C:%.*]] [label %B.target.E]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %C.target.F]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; CHECK-NEXT: to label [[A]] [label %D.target.F]
+; CHECK: E:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: F:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: G:
+; CHECK-NEXT: br label [[Y:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: B.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: D.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[B_TARGET_E:%.*]] ], [ false, [[C_TARGET_F:%.*]] ], [ false, [[D_TARGET_F:%.*]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[Y]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %G
+
+A:
+ br label %B
+
+B:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %C [label %E]
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %D [label %F]
+
+D:
+ callbr void asm "", "r,!i"(i1 %PredD) to label %A [label %F]
+
+E:
+ br label %exit
+
+F:
+ br label %exit
+
+G:
+ br label %F
+
+exit:
+ ret void
+}
+
define void @loop_2(i1 %PredA, i1 %PredB, i1 %PredC) {
; CHECK-LABEL: @loop_2(
; CHECK-NEXT: entry:
@@ -107,3 +168,67 @@ Z:
exit:
ret void
}
+
+define void @loop_2_callbr(i1 %PredA, i1 %PredB, i1 %PredC) {
+; CHECK-LABEL: @loop_2_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[A:%.*]]
+; CHECK: A:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.X]
+; CHECK: B:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C:%.*]] [label %B.target.Y]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %C.target.Z]
+; CHECK: D:
+; CHECK-NEXT: br label [[A]]
+; CHECK: X:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: Y:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: Z:
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: A.target.X:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: B.target.Y:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: C.target.Z:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[GUARD_X:%.*]] = phi i1 [ true, [[A_TARGET_X:%.*]] ], [ false, [[B_TARGET_Y:%.*]] ], [ false, [[C_TARGET_Z:%.*]] ]
+; CHECK-NEXT: [[GUARD_Y:%.*]] = phi i1 [ false, [[A_TARGET_X]] ], [ true, [[B_TARGET_Y]] ], [ false, [[C_TARGET_Z]] ]
+; CHECK-NEXT: br i1 [[GUARD_X]], label [[X:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: br i1 [[GUARD_Y]], label [[Y:%.*]], label [[Z:%.*]]
+;
+entry:
+ br label %A
+
+A:
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %X]
+
+B:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %C [label %Y]
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %D [label %Z]
+
+D:
+ br label %A
+
+X:
+ br label %exit
+
+Y:
+ br label %exit
+
+Z:
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll b/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
index f55639ff2db37..be982d5d043f9 100644
--- a/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/integer_guards.ll
@@ -71,6 +71,85 @@ E:
ret void
}
+define void @loop_two_exits_callbr(i1 %PredEntry, i1 %PredA) {
+; CHECK-LABEL: @loop_two_exits_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[E:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[C:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; CHECK: B:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[D:%.*]]
+; CHECK: C:
+; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.E]
+; CHECK: D:
+; CHECK-NEXT: unreachable
+; CHECK: E:
+; CHECK-NEXT: ret void
+; CHECK: A.target.B:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[A_TARGET_B]] ], [ 1, [[C_TARGET_E:%.*]] ]
+; CHECK-NEXT: [[B_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[B_PREDICATE]], label [[B:%.*]], label [[E]]
+;
+; BOOLEAN-LABEL: @loop_two_exits_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[E:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[C:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[D:%.*]]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; BOOLEAN-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; BOOLEAN-NEXT: to label [[A]] [label %C.target.E]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: unreachable
+; BOOLEAN: E:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: A.target.B:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: C.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, [[A_TARGET_B]] ], [ false, [[C_TARGET_E:%.*]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B]], label [[B:%.*]], label [[E]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %E
+
+A:
+ %inc1 = phi i32 [ 0, %entry ], [ %inc2, %C ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %C]
+
+B:
+ tail call fastcc void @check(i32 1) #0
+ br label %D
+
+C:
+ %inc2 = add i32 %inc1, 1
+ %cmp = icmp ult i32 %inc2, 10
+ callbr void asm "","r,!i"(i1 %cmp) to label %A [label %E]
+
+D:
+ unreachable
+
+E:
+ ret void
+}
+
; The loop exit blocks appear in an inner loop.
define void @inner_loop(i1 %PredEntry, i1 %PredA, i1 %PredB) {
@@ -196,6 +275,164 @@ I:
ret void
}
+define void @inner_loop_callbr(i1 %PredEntry, i1 %PredA, i1 %PredB) {
+; CHECK-LABEL: @inner_loop_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OUTER2:%.*]], [[G:%.*]] ]
+; CHECK-NEXT: br label [[B:%.*]]
+; CHECK: B:
+; CHECK-NEXT: [[INNER1:%.*]] = phi i32 [ 0, [[A]] ], [ [[INNER2:%.*]], [[F:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[D:%.*]] [label %B.target.B.target.C]
+; CHECK: C:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[H:%.*]]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[D_TARGET_D_TARGET_E:%.*]] [label %F]
+; CHECK: E:
+; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; CHECK-NEXT: br label [[H]]
+; CHECK: F:
+; CHECK-NEXT: [[INNER2]] = add i32 [[INNER1]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INNER2]], 20
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B]] [label %F.target.G]
+; CHECK: G:
+; CHECK-NEXT: [[OUTER2]] = add i32 [[OUTER1]], 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[OUTER2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; CHECK-NEXT: to label [[A]] [label %G.target.I]
+; CHECK: H:
+; CHECK-NEXT: unreachable
+; CHECK: I:
+; CHECK-NEXT: ret void
+; CHECK: B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: D.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: G.target.I:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[B_TARGET_C:%.*]] ], [ 1, [[D_TARGET_E:%.*]] ], [ 2, [[G_TARGET_I:%.*]] ]
+; CHECK-NEXT: [[C_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[C_PREDICATE]], label [[C:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[E_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 1
+; CHECK-NEXT: br i1 [[E_PREDICATE]], label [[E:%.*]], label [[I]]
+; CHECK: B.target.B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2:%.*]]
+; CHECK: D.target.D.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; CHECK: F.target.G:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; CHECK: loop.exit.guard2:
+; CHECK-NEXT: [[MERGED_BB_IDX4:%.*]] = phi i32 [ 0, [[B_TARGET_B_TARGET_C:%.*]] ], [ 1, [[D_TARGET_D_TARGET_E]] ], [ 2, [[F_TARGET_G:%.*]] ]
+; CHECK-NEXT: [[B_TARGET_C_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX4]], 0
+; CHECK-NEXT: br i1 [[B_TARGET_C_PREDICATE]], label [[B_TARGET_C]], label [[LOOP_EXIT_GUARD3:%.*]]
+; CHECK: loop.exit.guard3:
+; CHECK-NEXT: [[D_TARGET_E_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX4]], 1
+; CHECK-NEXT: br i1 [[D_TARGET_E_PREDICATE]], label [[D_TARGET_E]], label [[G]]
+;
+; BOOLEAN-LABEL: @inner_loop_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OUTER2:%.*]], [[G:%.*]] ]
+; BOOLEAN-NEXT: br label [[B:%.*]]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: [[INNER1:%.*]] = phi i32 [ 0, [[A]] ], [ [[INNER2:%.*]], [[F:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[D:%.*]] [label %B.target.B.target.C]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[H:%.*]]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; BOOLEAN-NEXT: to label [[D_TARGET_D_TARGET_E:%.*]] [label %F]
+; BOOLEAN: E:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[H]]
+; BOOLEAN: F:
+; BOOLEAN-NEXT: [[INNER2]] = add i32 [[INNER1]], 1
+; BOOLEAN-NEXT: [[CMP1:%.*]] = icmp ult i32 [[INNER2]], 20
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; BOOLEAN-NEXT: to label [[B]] [label %F.target.G]
+; BOOLEAN: G:
+; BOOLEAN-NEXT: [[OUTER2]] = add i32 [[OUTER1]], 1
+; BOOLEAN-NEXT: [[CMP2:%.*]] = icmp ult i32 [[OUTER2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; BOOLEAN-NEXT: to label [[A]] [label %G.target.I]
+; BOOLEAN: H:
+; BOOLEAN-NEXT: unreachable
+; BOOLEAN: I:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: B.target.C:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: D.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: G.target.I:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_C:%.*]] = phi i1 [ true, [[B_TARGET_C:%.*]] ], [ false, [[D_TARGET_E:%.*]] ], [ false, [[G_TARGET_I:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_E:%.*]] = phi i1 [ false, [[B_TARGET_C]] ], [ true, [[D_TARGET_E]] ], [ false, [[G_TARGET_I]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_C]], label [[C:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; BOOLEAN: loop.exit.guard1:
+; BOOLEAN-NEXT: br i1 [[GUARD_E]], label [[E:%.*]], label [[I]]
+; BOOLEAN: B.target.B.target.C:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2:%.*]]
+; BOOLEAN: D.target.D.target.E:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; BOOLEAN: F.target.G:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD2]]
+; BOOLEAN: loop.exit.guard2:
+; BOOLEAN-NEXT: [[GUARD_B_TARGET_C:%.*]] = phi i1 [ true, [[B_TARGET_B_TARGET_C:%.*]] ], [ false, [[D_TARGET_D_TARGET_E]] ], [ false, [[F_TARGET_G:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_D_TARGET_E:%.*]] = phi i1 [ false, [[B_TARGET_B_TARGET_C]] ], [ true, [[D_TARGET_D_TARGET_E]] ], [ false, [[F_TARGET_G]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B_TARGET_C]], label [[B_TARGET_C]], label [[LOOP_EXIT_GUARD3:%.*]]
+; BOOLEAN: loop.exit.guard3:
+; BOOLEAN-NEXT: br i1 [[GUARD_D_TARGET_E]], label [[D_TARGET_E]], label [[G]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %I
+
+A:
+ %outer1 = phi i32 [ 0, %entry ], [ %outer2, %G ]
+ br label %B
+
+B:
+ %inner1 = phi i32 [ 0, %A ], [ %inner2, %F ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %D [label %C]
+
+C:
+ tail call fastcc void @check(i32 1) #0
+ br label %H
+
+D:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %E [label %F]
+
+E:
+ tail call fastcc void @check(i32 2) #0
+ br label %H
+
+F:
+ %inner2 = add i32 %inner1, 1
+ %cmp1 = icmp ult i32 %inner2, 20
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %G]
+
+G:
+ %outer2 = add i32 %outer1, 1
+ %cmp2 = icmp ult i32 %outer2, 10
+ callbr void asm "", "r,!i"(i1 %cmp2) to label %A [label %I]
+
+H:
+ unreachable
+
+I:
+ ret void
+}
+
; A loop with more exit blocks.
define void @loop_five_exits(i1 %PredEntry, i1 %PredA, i1 %PredB, i1 %PredC, i1 %PredD) {
@@ -341,6 +578,179 @@ L:
ret void
}
+define void @loop_five_exits_callbr(i1 %PredEntry, i1 %PredA, i1 %PredB, i1 %PredC, i1 %PredD) {
+; CHECK-LABEL: @loop_five_exits_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[L:%.*]]
+; CHECK: A:
+; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[I:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; CHECK-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; CHECK: B:
+; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; CHECK-NEXT: br label [[J:%.*]]
+; CHECK: C:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; CHECK-NEXT: to label [[C_TARGET_D:%.*]] [label %E]
+; CHECK: D:
+; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; CHECK-NEXT: br label [[J]]
+; CHECK: E:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; CHECK-NEXT: to label [[E_TARGET_F:%.*]] [label %G]
+; CHECK: F:
+; CHECK-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]]
+; CHECK-NEXT: br label [[K:%.*]]
+; CHECK: G:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; CHECK-NEXT: to label [[G_TARGET_H:%.*]] [label %I]
+; CHECK: H:
+; CHECK-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]]
+; CHECK-NEXT: br label [[K]]
+; CHECK: I:
+; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %I.target.L]
+; CHECK: J:
+; CHECK-NEXT: br label [[L]]
+; CHECK: K:
+; CHECK-NEXT: br label [[L]]
+; CHECK: L:
+; CHECK-NEXT: ret void
+; CHECK: A.target.B:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: E.target.F:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: G.target.H:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: I.target.L:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MERGED_BB_IDX:%.*]] = phi i32 [ 0, [[A_TARGET_B]] ], [ 1, [[C_TARGET_D]] ], [ 2, [[E_TARGET_F]] ], [ 3, [[G_TARGET_H]] ], [ 4, [[I_TARGET_L:%.*]] ]
+; CHECK-NEXT: [[B_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 0
+; CHECK-NEXT: br i1 [[B_PREDICATE]], label [[B:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[D_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 1
+; CHECK-NEXT: br i1 [[D_PREDICATE]], label [[D:%.*]], label [[LOOP_EXIT_GUARD2:%.*]]
+; CHECK: loop.exit.guard2:
+; CHECK-NEXT: [[F_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 2
+; CHECK-NEXT: br i1 [[F_PREDICATE]], label [[F:%.*]], label [[LOOP_EXIT_GUARD3:%.*]]
+; CHECK: loop.exit.guard3:
+; CHECK-NEXT: [[H_PREDICATE:%.*]] = icmp eq i32 [[MERGED_BB_IDX]], 3
+; CHECK-NEXT: br i1 [[H_PREDICATE]], label [[H:%.*]], label [[L]]
+;
+; BOOLEAN-LABEL: @loop_five_exits_callbr(
+; BOOLEAN-NEXT: entry:
+; BOOLEAN-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[L:%.*]]
+; BOOLEAN: A:
+; BOOLEAN-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC2:%.*]], [[I:%.*]] ]
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA:%.*]])
+; BOOLEAN-NEXT: to label [[A_TARGET_B:%.*]] [label %C]
+; BOOLEAN: B:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[J:%.*]]
+; BOOLEAN: C:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB:%.*]])
+; BOOLEAN-NEXT: to label [[C_TARGET_D:%.*]] [label %E]
+; BOOLEAN: D:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[J]]
+; BOOLEAN: E:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDC:%.*]])
+; BOOLEAN-NEXT: to label [[E_TARGET_F:%.*]] [label %G]
+; BOOLEAN: F:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[K:%.*]]
+; BOOLEAN: G:
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[PREDD:%.*]])
+; BOOLEAN-NEXT: to label [[G_TARGET_H:%.*]] [label %I]
+; BOOLEAN: H:
+; BOOLEAN-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]]
+; BOOLEAN-NEXT: br label [[K]]
+; BOOLEAN: I:
+; BOOLEAN-NEXT: [[INC2]] = add i32 [[INC1]], 1
+; BOOLEAN-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC2]], 10
+; BOOLEAN-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; BOOLEAN-NEXT: to label [[A]] [label %I.target.L]
+; BOOLEAN: J:
+; BOOLEAN-NEXT: br label [[L]]
+; BOOLEAN: K:
+; BOOLEAN-NEXT: br label [[L]]
+; BOOLEAN: L:
+; BOOLEAN-NEXT: ret void
+; BOOLEAN: A.target.B:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; BOOLEAN: C.target.D:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: E.target.F:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: G.target.H:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: I.target.L:
+; BOOLEAN-NEXT: br label [[LOOP_EXIT_GUARD]]
+; BOOLEAN: loop.exit.guard:
+; BOOLEAN-NEXT: [[GUARD_B:%.*]] = phi i1 [ true, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L:%.*]] ]
+; BOOLEAN-NEXT: [[GUARD_D:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ true, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: [[GUARD_F:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ true, [[E_TARGET_F]] ], [ false, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: [[GUARD_H:%.*]] = phi i1 [ false, [[A_TARGET_B]] ], [ false, [[C_TARGET_D]] ], [ false, [[E_TARGET_F]] ], [ true, [[G_TARGET_H]] ], [ false, [[I_TARGET_L]] ]
+; BOOLEAN-NEXT: br i1 [[GUARD_B]], label [[B:%.*]], label [[LOOP_EXIT_GUARD1:%.*]]
+; BOOLEAN: loop.exit.guard1:
+; BOOLEAN-NEXT: br i1 [[GUARD_D]], label [[D:%.*]], label [[LOOP_EXIT_GUARD2:%.*]]
+; BOOLEAN: loop.exit.guard2:
+; BOOLEAN-NEXT: br i1 [[GUARD_F]], label [[F:%.*]], label [[LOOP_EXIT_GUARD3:%.*]]
+; BOOLEAN: loop.exit.guard3:
+; BOOLEAN-NEXT: br i1 [[GUARD_H]], label [[H:%.*]], label [[L]]
+;
+entry:
+ br i1 %PredEntry, label %A, label %L
+
+A:
+ %inc1 = phi i32 [ 0, %entry ], [ %inc2, %I ]
+ callbr void asm "", "r,!i"(i1 %PredA) to label %B [label %C]
+
+B:
+ tail call fastcc void @check(i32 1) #0
+ br label %J
+
+C:
+ callbr void asm "", "r,!i"(i1 %PredB) to label %D [label %E]
+
+D:
+ tail call fastcc void @check(i32 2) #0
+ br label %J
+
+E:
+ callbr void asm "", "r,!i"(i1 %PredC) to label %F [label %G]
+
+F:
+ tail call fastcc void @check(i32 3) #0
+ br label %K
+
+G:
+ callbr void asm "", "r,!i"(i1 %PredD) to label %H [label %I]
+
+H:
+ tail call fastcc void @check(i32 4) #0
+ br label %K
+
+I:
+ %inc2 = add i32 %inc1, 1
+ %cmp = icmp ult i32 %inc2, 10
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %L]
+
+J:
+ br label %L
+
+K:
+ br label %L
+
+L:
+ ret void
+}
+
declare void @check(i32 noundef %i) #0
diff --git a/llvm/test/Transforms/UnifyLoopExits/nested.ll b/llvm/test/Transforms/UnifyLoopExits/nested.ll
index 8fae2c4349a7b..f79eea2b20c8b 100644
--- a/llvm/test/Transforms/UnifyLoopExits/nested.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/nested.ll
@@ -78,3 +78,93 @@ exit:
%exit.phi = phi i32 [%A4.phi, %A5], [%Z, %C]
ret void
}
+
+define void @nested_callbr(i1 %PredB3, i1 %PredB4, i1 %PredA4, i1 %PredA3, i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @nested_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[A1:%.*]]
+; CHECK: A1:
+; CHECK-NEXT: br label [[B1:%.*]]
+; CHECK: B1:
+; CHECK-NEXT: br label [[B2:%.*]]
+; CHECK: B2:
+; CHECK-NEXT: [[X_INC:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: br label [[B3:%.*]]
+; CHECK: B3:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB3:%.*]])
+; CHECK-NEXT: to label [[B4:%.*]] [label %B3.target.A3]
+; CHECK: B4:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDB4:%.*]])
+; CHECK-NEXT: to label [[B1]] [label %B4.target.A2]
+; CHECK: A2:
+; CHECK-NEXT: br label [[A4:%.*]]
+; CHECK: A3:
+; CHECK-NEXT: br label [[A4]]
+; CHECK: A4:
+; CHECK-NEXT: [[A4_PHI:%.*]] = phi i32 [ [[Y:%.*]], [[A3:%.*]] ], [ [[X_INC_MOVED:%.*]], [[A2:%.*]] ]
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA4:%.*]])
+; CHECK-NEXT: to label [[A4_TARGET_C:%.*]] [label %A5]
+; CHECK: A5:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[PREDA3:%.*]])
+; CHECK-NEXT: to label [[A5_TARGET_EXIT:%.*]] [label %A1]
+; CHECK: C:
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[EXIT_PHI:%.*]] = phi i32 [ [[Z:%.*]], [[C:%.*]] ], [ [[EXIT_PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret void
+; CHECK: A4.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: A5.target.exit:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[EXIT_PHI_MOVED]] = phi i32 [ poison, [[A4_TARGET_C]] ], [ [[A4_PHI]], [[A5_TARGET_EXIT]] ]
+; CHECK-NEXT: [[GUARD_C:%.*]] = phi i1 [ true, [[A4_TARGET_C]] ], [ false, [[A5_TARGET_EXIT]] ]
+; CHECK-NEXT: br i1 [[GUARD_C]], label [[C]], label [[EXIT]]
+; CHECK: B3.target.A3:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1:%.*]]
+; CHECK: B4.target.A2:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD1]]
+; CHECK: loop.exit.guard1:
+; CHECK-NEXT: [[X_INC_MOVED]] = phi i32 [ [[X_INC]], [[B3_TARGET_A3:%.*]] ], [ [[X_INC]], [[B4_TARGET_A2:%.*]] ]
+; CHECK-NEXT: [[GUARD_A3:%.*]] = phi i1 [ true, [[B3_TARGET_A3]] ], [ false, [[B4_TARGET_A2]] ]
+; CHECK-NEXT: br i1 [[GUARD_A3]], label [[A3]], label [[A2]]
+;
+entry:
+ br label %A1
+
+A1:
+ br label %B1
+
+B1:
+ br label %B2
+
+B2:
+ %X.inc = add i32 %X, 1
+ br label %B3
+
+B3:
+ callbr void asm "", "r,!i"(i1 %PredB3) to label %B4 [label %A3]
+
+B4:
+ callbr void asm "", "r,!i"(i1 %PredB4) to label %B1 [label %A2]
+
+A2:
+ br label %A4
+
+A3:
+ br label %A4
+
+A4:
+ %A4.phi = phi i32 [%Y, %A3], [%X.inc, %A2]
+ callbr void asm "", "r,!i"(i1 %PredA4) to label %C [label %A5]
+
+A5:
+ callbr void asm "", "r,!i"(i1 %PredA3) to label %exit [label %A1]
+
+C:
+ br label %exit
+
+exit:
+ %exit.phi = phi i32 [%A4.phi, %A5], [%Z, %C]
+ ret void
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll b/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
index 3e68df3e79260..ffe8026a535c0 100644
--- a/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/restore-ssa.ll
@@ -57,6 +57,60 @@ return:
ret i32 %phi
}
+define i32 @exiting-used-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @exiting-used-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP41]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %B.target.C]
+; CHECK: C:
+; CHECK-NEXT: [[INC:%.*]] = add i32 [[MYTMP41_MOVED:%.*]], 1
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[INC]], [[C:%.*]] ], [ [[PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: B.target.C:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[B_TARGET_C:%.*]] ]
+; CHECK-NEXT: [[PHI_MOVED]] = phi i32 [ [[MYTMP42]], [[A_TARGET_RETURN]] ], [ poison, [[B_TARGET_C]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[B_TARGET_C]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[C]]
+;
+entry:
+ callbr void asm "", ""() to label %A []
+
+A:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ %cmp = icmp slt i32 %mytmp41, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %C]
+
+C:
+ %inc = add i32 %mytmp41, 1
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %inc, %C ], [ %mytmp42, %A ]
+ ret i32 %phi
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -112,6 +166,63 @@ return:
ret i32 0
}
+define i32 @internal-used-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @internal-used-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: [[INC:%.*]] = add i32 [[MYTMP41_MOVED:%.*]], 1
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[D:%.*]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ callbr void asm "", ""() to label %A []
+
+A:
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ %inc = add i32 %mytmp41, 1
+ callbr void asm "", ""() to label %return []
+
+return:
+ ret i32 0
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -172,6 +283,68 @@ return:
ret i32 %phi
}
+define i32 @mixed-use-in-exit_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @mixed-use-in-exit_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP2]])
+; CHECK-NEXT: to label [[A:%.*]] [label %return]
+; CHECK: A:
+; CHECK-NEXT: [[MYTMP43:%.*]] = add i32 [[MYTMP42]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.return]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[MYTMP41_MOVED:%.*]], [[D:%.*]] ], [ [[MYTMP42]], [[ENTRY:%.*]] ], [ [[PHI_MOVED:%.*]], [[LOOP_EXIT_GUARD:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.return:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_RETURN:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[PHI_MOVED]] = phi i32 [ [[MYTMP43]], [[A_TARGET_RETURN]] ], [ poison, [[C_TARGET_D]] ]
+; CHECK-NEXT: [[GUARD_RETURN:%.*]] = phi i1 [ true, [[A_TARGET_RETURN]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_RETURN]], label [[RETURN]], label [[D]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ %cmp2 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp2) to label %A [label %return]
+
+A:
+ %mytmp43 = add i32 %mytmp42, 1
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %return]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %mytmp41, %D ], [ %mytmp43, %A ], [%mytmp42, %entry]
+ ret i32 %phi
+}
+
; Loop consists of A, B and C:
; - A is the header
; - A and C are exiting blocks
@@ -236,3 +409,66 @@ return:
%phi = phi i32 [ %mytmp41, %D ], [ %mytmp42, %E ]
ret i32 %phi
}
+
+define i32 @phi-via-external-block_callbr(ptr %arg1, ptr %arg2) local_unnamed_addr align 2 {
+; CHECK-LABEL: @phi-via-external-block_callbr(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[MYTMP42:%.*]] = load i32, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[A:%.*]] []
+; CHECK: A:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP1]])
+; CHECK-NEXT: to label [[B:%.*]] [label %A.target.E]
+; CHECK: B:
+; CHECK-NEXT: [[MYTMP41:%.*]] = load i32, ptr [[ARG2:%.*]], align 4
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[C:%.*]] []
+; CHECK: C:
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MYTMP42]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[CMP]])
+; CHECK-NEXT: to label [[A]] [label %C.target.D]
+; CHECK: D:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN:%.*]] []
+; CHECK: E:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label [[RETURN]] []
+; CHECK: return:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[MYTMP41_MOVED:%.*]], [[D:%.*]] ], [ [[MYTMP42]], [[E:%.*]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+; CHECK: A.target.E:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD:%.*]]
+; CHECK: C.target.D:
+; CHECK-NEXT: br label [[LOOP_EXIT_GUARD]]
+; CHECK: loop.exit.guard:
+; CHECK-NEXT: [[MYTMP41_MOVED]] = phi i32 [ poison, [[A_TARGET_E:%.*]] ], [ [[MYTMP41]], [[C_TARGET_D:%.*]] ]
+; CHECK-NEXT: [[GUARD_E:%.*]] = phi i1 [ true, [[A_TARGET_E]] ], [ false, [[C_TARGET_D]] ]
+; CHECK-NEXT: br i1 [[GUARD_E]], label [[E]], label [[D]]
+;
+entry:
+ %mytmp42 = load i32, ptr %arg1, align 4
+ callbr void asm "", ""() to label %A []
+
+A:
+ %cmp1 = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp1) to label %B [label %E]
+
+B:
+ %mytmp41 = load i32, ptr %arg2, align 4
+ callbr void asm "", ""() to label %C []
+
+C:
+ %cmp = icmp slt i32 %mytmp42, 0
+ callbr void asm "", "r,!i"(i1 %cmp) to label %A [label %D]
+
+D:
+ callbr void asm "", ""() to label %return []
+
+E:
+ callbr void asm "", ""() to label %return []
+
+return:
+ %phi = phi i32 [ %mytmp41, %D ], [ %mytmp42, %E ]
+ ret i32 %phi
+}
diff --git a/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll b/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
index 05f50fcc37d6e..e65e2549a21c8 100644
--- a/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
+++ b/llvm/test/Transforms/UnifyLoopExits/undef-phis.ll
@@ -56,3 +56,71 @@ mbb5291: ; preds = %mbb4321
store volatile [2 x i32] %i5293, ptr addrspace(5) null, align 4
ret void
}
+
+define fastcc void @undef_phi_callbr(i64 %i5247, i1 %i4530, i1 %i4936.not) {
+; CHECK-LABEL: define fastcc void @undef_phi_callbr(
+; CHECK-SAME: i64 [[I5247:%.*]], i1 [[I4530:%.*]], i1 [[I4936_NOT:%.*]]) {
+; CHECK-NEXT: [[MBB:.*:]]
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[MBB3932:.*]] []
+; CHECK: [[MBB3932]]:
+; CHECK-NEXT: callbr void asm "", ""()
+; CHECK-NEXT: to label %[[MBB4454:.*]] []
+; CHECK: [[MBB4321:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[I5247]] to i32
+; CHECK-NEXT: [[I5290:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I5290]])
+; CHECK-NEXT: to label %[[MBB3932]] [label %mbb4321.target.mbb5291]
+; CHECK: [[MBB4454]]:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I4530]])
+; CHECK-NEXT: to label %[[MBB4535:.*]] [label %mbb4454.target.mbb4454.target.mbb4531]
+; CHECK: [[MBB4531:.*]]:
+; CHECK-NEXT: ret void
+; CHECK: [[MBB4535]]:
+; CHECK-NEXT: callbr void asm "", "r,!i"(i1 [[I4936_NOT]])
+; CHECK-NEXT: to label %[[MBB4535_TARGET_MBB4321:.*]] [label %mbb4454]
+; CHECK: [[MBB5291:.*]]:
+; CHECK-NEXT: [[I5293:%.*]] = insertvalue [2 x i32] zeroinitializer, i32 [[DOTMOVED:%.*]], 1
+; CHECK-NEXT: store volatile [2 x i32] [[I5293]], ptr addrspace(5) null, align 4
+; CHECK-NEXT: ret void
+; CHECK: [[MBB4454_TARGET_MBB4531:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD:.*]]
+; CHECK: [[MBB4321_TARGET_MBB5291:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD]]
+; CHECK: [[LOOP_EXIT_GUARD]]:
+; CHECK-NEXT: [[DOTMOVED]] = phi i32 [ poison, %[[MBB4454_TARGET_MBB4531]] ], [ [[TMP0]], %[[MBB4321_TARGET_MBB5291]] ]
+; CHECK-NEXT: [[GUARD_MBB4531:%.*]] = phi i1 [ true, %[[MBB4454_TARGET_MBB4531]] ], [ false, %[[MBB4321_TARGET_MBB5291]] ]
+; CHECK-NEXT: br i1 [[GUARD_MBB4531]], label %[[MBB4531]], label %[[MBB5291]]
+; CHECK: [[MBB4454_TARGET_MBB4454_TARGET_MBB4531:.*]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD1:.*]]
+; CHECK: [[MBB4535_TARGET_MBB4321]]:
+; CHECK-NEXT: br label %[[LOOP_EXIT_GUARD1]]
+; CHECK: [[LOOP_EXIT_GUARD1]]:
+; CHECK-NEXT: [[GUARD_MBB4454_TARGET_MBB4531:%.*]] = phi i1 [ true, %[[MBB4454_TARGET_MBB4454_TARGET_MBB4531]] ], [ false, %[[MBB4535_TARGET_MBB4321]] ]
+; CHECK-NEXT: br i1 [[GUARD_MBB4454_TARGET_MBB4531]], label %[[MBB4454_TARGET_MBB4531]], label %[[MBB4321]]
+;
+mbb:
+ callbr void asm "", ""() to label %mbb3932 []
+
+mbb3932: ; preds = %mbb4321, %mbb
+ callbr void asm "", ""() to label %mbb4454 []
+
+mbb4321: ; preds = %mbb4535
+ %0 = trunc i64 %i5247 to i32
+ %i5290 = icmp eq i32 %0, 0
+ callbr void asm "", "r,!i"(i1 %i5290) to label %mbb3932 [label %mbb5291]
+
+mbb4454: ; preds = %mbb4535, %mbb3932
+ callbr void asm "", "r,!i"(i1 %i4530) to label %mbb4535 [label %mbb4531]
+
+mbb4531: ; preds = %mbb4454
+ ret void
+
+mbb4535: ; preds = %mbb4454
+ callbr void asm "", "r,!i"(i1 %i4936.not) to label %mbb4321 [label %mbb4454]
+
+mbb5291: ; preds = %mbb4321
+ %i5293 = insertvalue [2 x i32] zeroinitializer, i32 %0, 1
+ store volatile [2 x i32] %i5293, ptr addrspace(5) null, align 4
+ ret void
+}
>From 32168ed9df8f98550c29e78ffacd063c4131d040 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 18 Jul 2025 14:44:59 +0200
Subject: [PATCH 2/6] Apply suggestions from code review
Co-authored-by: Matt Arsenault <Matthew.Arsenault at amd.com>
---
llvm/lib/Transforms/Utils/ControlFlowUtils.cpp | 2 +-
llvm/lib/Transforms/Utils/FixIrreducible.cpp | 16 +++++++---------
llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 8 +++-----
llvm/test/Transforms/FixIrreducible/bug45623.ll | 2 +-
4 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index f7197a68813dd..ce57a96d2ea7c 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -360,7 +360,7 @@ BasicBlock *ControlFlowHub::createCallBrTarget(
// Jump from the new target block to the original successor.
BranchInst::Create(Succ, CallBrTarget);
if (LI) {
- if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ); L) {
+ if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ)) {
bool AddToLoop = true;
if (AttachToCallBr) {
// Check if the loops are disjoint. In that case, we do not add the
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index ade23f942352d..eaed81c81b6f6 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -290,7 +290,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator())) {
// Exactly one of the two successors is the header.
BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
BasicBlock *Succ1 = Succ0 ? nullptr : Header;
@@ -302,20 +302,19 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
<< (Succ0 ? Succ0->getName() : "") << " "
<< (Succ1 ? Succ1->getName() : "") << "\n");
- } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
- CallBr) {
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
BasicBlock *Succ = CallBr->getSuccessor(I);
if (Succ != Header)
continue;
- BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
CallBr, Succ, I, false, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
<< " -> " << Succ->getName() << "\n");
}
} else {
- llvm_unreachable("Unsupported block terminator.");
+ llvm_unreachable("unsupported block terminator");
}
}
@@ -340,20 +339,19 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
<< (Succ0 ? Succ0->getName() : "") << " "
<< (Succ1 ? Succ1->getName() : "") << "\n");
- } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
- CallBr) {
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
BasicBlock *Succ = CallBr->getSuccessor(I);
if (!C.contains(Succ))
continue;
- BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
CallBr, Succ, I, true, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
<< " -> " << Succ->getName() << "\n");
}
} else {
- llvm_unreachable("Unsupported block terminator.");
+ llvm_unreachable("unsupported block terminator");
}
}
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 51e5aaa5225e1..2402763762303 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -161,8 +161,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
BasicBlock *BB = ExitingBlocks[I];
- if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
- Branch) {
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator())) {
BasicBlock *Succ0 = Branch->getSuccessor(0);
Succ0 = L->contains(Succ0) ? nullptr : Succ0;
@@ -174,8 +173,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
<< (Succ0 ? Succ0->getName() : "<none>") << ", "
<< (Succ1 ? Succ1->getName() : "<none>") << "}\n");
- } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator());
- CallBr) {
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {
for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
BasicBlock *Succ = CallBr->getSuccessor(J);
if (L->contains(Succ))
@@ -196,7 +194,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
CallBrTargetBlocks.push_back(NewSucc);
}
} else {
- llvm_unreachable("Unsupported block terminator.");
+ llvm_unreachable("unsupported block terminator");
}
}
diff --git a/llvm/test/Transforms/FixIrreducible/bug45623.ll b/llvm/test/Transforms/FixIrreducible/bug45623.ll
index 57ebf0c47e515..b6dd6fb9e6fcb 100644
--- a/llvm/test/Transforms/FixIrreducible/bug45623.ll
+++ b/llvm/test/Transforms/FixIrreducible/bug45623.ll
@@ -91,7 +91,7 @@ if.else629: ; preds = %backtrack
br label %retry
}
-define dso_local void @tre_tnfa_run_backtrack_callbr(i1 %arg) {
+define void @tre_tnfa_run_backtrack_callbr(i1 %arg) {
; CHECK-LABEL: @tre_tnfa_run_backtrack_callbr(
; CHECK-NEXT: entry:
; CHECK-NEXT: callbr void asm "", ""()
>From bba4092a3de356e046ec8cef47a98e9fb7e47b16 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 18 Jul 2025 08:31:14 -0500
Subject: [PATCH 3/6] implement feedback part 2
---
.../llvm/Transforms/Utils/BasicBlockUtils.h | 3 ++
.../llvm/Transforms/Utils/ControlFlowUtils.h | 49 ++++++++++---------
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 9 +++-
.../lib/Transforms/Utils/ControlFlowUtils.cpp | 2 +-
llvm/lib/Transforms/Utils/FixIrreducible.cpp | 20 ++++----
llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 10 ++--
llvm/test/Transforms/FixIrreducible/callbr.ll | 7 ++-
7 files changed, 55 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index fc7b313eb552a..caf3bf54177ed 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -21,6 +21,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Printable.h"
#include <cassert>
namespace llvm {
@@ -618,6 +619,8 @@ LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
// (br/brcond/unreachable/ret) or callbr.
LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
+LLVM_ABI Printable printBBPtr(const BasicBlock *BB);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
index e55efbc907d42..2dd2efcc49a1d 100644
--- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
@@ -123,30 +123,31 @@ struct ControlFlowHub {
SmallVector<BranchDescriptor> Branches;
- /**
- * \brief Create a new intermediate target block for a callbr edge.
- *
- * This function creates a new basic block (the "target block") that sits
- * between a callbr instruction and one of its successors. The callbr's
- * successor is rewired to this new block, and the new block unconditionally
- * branches to the original successor. This is useful for normalizing control
- * flow, e.g., when transforming irreducible loops.
- *
- * \param CallBr The callbr instruction whose edge is to be split.
- * \param Succ The original successor basic block to be reached.
- * \param SuccIdx The index of the successor in the callbr instruction.
- * \param AttachToCallBr If true, the new block is associated with the
- * callbr's parent for loop/cycle info. If false, the new block is associated
- * with the callbr's successor for loop/cycle info. \param CI Optional
- * CycleInfo for updating cycle membership. \param DTU Optional
- * DomTreeUpdater for updating the dominator tree. \param LI Optional LoopInfo
- * for updating loop membership.
- *
- * \returns The newly created intermediate target block.
- *
- * \note This function updates PHI nodes, dominator tree, loop info, and cycle
- * info as needed.
- */
+ /// \brief Create a new intermediate target block for a callbr edge.
+ ///
+ /// This function creates a new basic block (the "target block") that sits
+ /// between a callbr instruction and one of its successors. The callbr's
+ /// successor is rewired to this new block, and the new block unconditionally
+ /// branches to the original successor. This is useful for normalizing control
+ /// flow, e.g., when transforming irreducible loops.
+ ///
+ /// \param CallBr The callbr instruction whose edge is to be split.
+ /// \param Succ The original successor basic block to be reached.
+ /// \param SuccIdx The index of the successor in the callbr
+ /// instruction.
+ /// \param AttachToCallBr If true, the new block is associated with the
+ /// callbr's parent for loop/cycle info.
+ /// If false, the new block is associated with the
+ /// callbr's successor for loop/cycle info.
+ /// \param CI Optional CycleInfo for updating cycle membership.
+ /// \param DTU Optional DomTreeUpdater for updating the dominator
+ /// tree.
+ /// \param LI Optional LoopInfo for updating loop membership.
+ ///
+ /// \returns The newly created intermediate target block.
+ ///
+ /// \note This function updates PHI nodes, dominator tree, loop info, and
+ /// cycle info as needed.
static BasicBlock *
createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
bool AttachToCallBr = true, CycleInfo *CI = nullptr,
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6103d07212fc2..d8404794913f4 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1783,4 +1783,11 @@ bool llvm::hasOnlySimpleTerminator(const Function &F) {
bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
CallBrInst>(F);
-}
\ No newline at end of file
+}
+
+Printable llvm::printBBPtr(const BasicBlock *BB) {
+ return Printable([BB](raw_ostream &OS) {
+ if (BB)
+ return BB->printAsOperand(OS);
+ });
+}
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index ce57a96d2ea7c..688247fa35e96 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -396,4 +396,4 @@ BasicBlock *ControlFlowHub::createCallBrTarget(
{DominatorTree::Insert, CallBrTarget, Succ}});
}
return CallBrTarget;
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index eaed81c81b6f6..a22bbfbf87a4e 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -299,9 +299,9 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
assert(Succ0 || Succ1);
CHub.addBranch(P, Succ0, Succ1);
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << printBBPtr(P) << " -> "
+ << printBBPtr(Succ0) << (Succ0 && Succ1 ? " " : "")
+ << printBBPtr(Succ1) << "\n");
} else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
BasicBlock *Succ = CallBr->getSuccessor(I);
@@ -310,8 +310,8 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
CallBr, Succ, I, false, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
- LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
- << " -> " << Succ->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << printBBPtr(NewSucc)
+ << " -> " << printBBPtr(Succ) << "\n");
}
} else {
llvm_unreachable("unsupported block terminator");
@@ -336,9 +336,9 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
CHub.addBranch(P, Succ0, Succ1);
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ LLVM_DEBUG(dbgs() << "Added external branch: " << printBBPtr(P) << " -> "
+ << printBBPtr(Succ0) << (Succ0 && Succ1 ? " " : "")
+ << printBBPtr(Succ1) << "\n");
} else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator())) {
for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
BasicBlock *Succ = CallBr->getSuccessor(I);
@@ -347,8 +347,8 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
CallBr, Succ, I, true, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
- LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
- << " -> " << Succ->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Added external branch: " << printBBPtr(NewSucc)
+ << " -> " << printBBPtr(Succ) << "\n");
}
} else {
llvm_unreachable("unsupported block terminator");
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 2402763762303..9007f4780d5ae 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -170,9 +170,9 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
Succ1 = L->contains(Succ1) ? nullptr : Succ1;
CHub.addBranch(BB, Succ0, Succ1);
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+ LLVM_DEBUG(dbgs() << "Added extiting branch: " << printBBPtr(BB) << " -> "
+ << printBBPtr(Succ0) << (Succ0 && Succ1 ? " " : "")
+ << printBBPtr(Succ1) << "\n");
} else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator())) {
for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
BasicBlock *Succ = CallBr->getSuccessor(J);
@@ -187,8 +187,8 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// themselves.
ExitingBlocks[I] = NewSucc;
CHub.addBranch(NewSucc, Succ);
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << NewSucc->getName()
- << " -> " << Succ->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << printBBPtr(NewSucc)
+ << " -> " << printBBPtr(Succ) << "\n");
// Also add the new target block to the list of exiting blocks that
// should later be added to the parent loops.
CallBrTargetBlocks.push_back(NewSucc);
diff --git a/llvm/test/Transforms/FixIrreducible/callbr.ll b/llvm/test/Transforms/FixIrreducible/callbr.ll
index 7386b1000889b..49a53bf6e603b 100644
--- a/llvm/test/Transforms/FixIrreducible/callbr.ll
+++ b/llvm/test/Transforms/FixIrreducible/callbr.ll
@@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -fix-irreducible --verify-loop-info -S | FileCheck %s
; RUN: opt < %s -passes='fix-irreducible,verify<loops>' -S | FileCheck %s
; RUN: opt < %s -passes='verify<loops>,fix-irreducible,verify<loops>' -S | FileCheck %s
; RUN: opt < %s -passes='print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefix LOOPS-BEFORE
@@ -9,9 +8,9 @@
; LOOPS-AFTER: Loop info for function 'callbr_entry':{{$}}
; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%indirect,%fallthrough<latch><exiting>{{$}}
-; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':
-; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':
-; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%block1<exiting>,%block<latch>
+; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':{{$}}
+; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_targets_with_phi_nodes':{{$}}
+; LOOPS-AFTER-NEXT: Loop at depth 1 containing: %irr.guard<header>,%block1<exiting>,%block<latch>{{$}}
; LOOPS-BEFORE-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets':{{$}}
; LOOPS-AFTER-NEXT: Loop info for function 'callbr_entry_multiple_indirect_targets':{{$}}
>From 4800a5fb53e9e0531945aac331f48f4cc3a7c879 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Mon, 21 Jul 2025 04:30:15 -0500
Subject: [PATCH 4/6] implement feedback
---
.../llvm/Transforms/Utils/BasicBlockUtils.h | 9 +--
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 15 +---
.../lib/Transforms/Utils/ControlFlowUtils.cpp | 81 ++++++++++++-------
llvm/lib/Transforms/Utils/FixIrreducible.cpp | 6 +-
llvm/lib/Transforms/Utils/UnifyLoopExits.cpp | 6 +-
5 files changed, 59 insertions(+), 58 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index caf3bf54177ed..4452be4cb3d62 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -608,17 +608,10 @@ LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F,
// successors
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-template <typename... TermInst>
-LLVM_ABI bool hasOnlyGivenTerminators(const Function &F);
-
-// Check whether the function only has blocks with simple terminators:
+// Check whether the function only has simple terminator:
// br/brcond/unreachable/ret
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
-// Check whether the function only has blocks with simple terminators
-// (br/brcond/unreachable/ret) or callbr.
-LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
-
LLVM_ABI Printable printBBPtr(const BasicBlock *BB);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index d8404794913f4..4b17e8577acd7 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1766,25 +1766,16 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->swapSuccessors();
}
-template <typename... TermInst>
-bool llvm::hasOnlyGivenTerminators(const Function &F) {
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
for (auto &BB : F) {
auto *Term = BB.getTerminator();
- if (!(isa<TermInst>(Term) || ...))
+ if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
+ isa<BranchInst>(Term)))
return false;
}
return true;
}
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
- return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst>(F);
-}
-
-bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
- return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
- CallBrInst>(F);
-}
-
Printable llvm::printBBPtr(const BasicBlock *BB) {
return Printable([BB](raw_ostream &OS) {
if (BB)
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 688247fa35e96..a84b5f8843a5c 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -346,6 +346,53 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
return {FirstGuardBlock, true};
}
+// Check if the given cycle is disjoint with the cycle of the given basic block.
+// If the basic block does not belong to any cycle, this is regarded as
+// disjoint, too.
+static bool disjointWithBlock(CycleInfo *CI, Cycle *C, BasicBlock *BB) {
+ Cycle *CC = CI->getCycle(BB);
+ if (!CC)
+ return true;
+ Cycle *CommonC = CI->getSmallestCommonCycle(C, CC);
+ return CommonC != C && CommonC != CC;
+}
+
+// Check if the given loop is disjoint with the loop of the given basic block.
+// If the basic block does not belong to any loop, this is regarded as
+// disjoint, too.
+static bool disjointWithBlock(LoopInfo *LI, Loop *L, BasicBlock *BB) {
+ Loop *LL = LI->getLoopFor(BB);
+ return LL && !L->contains(LL) && !LL->contains(L);
+}
+
+template <typename TI, typename T>
+static void updateCycleLoopInfo(TI *LCI, bool AttachToCallBr,
+ BasicBlock *CallBrBlock,
+ BasicBlock *CallBrTarget, BasicBlock *Succ) {
+ static_assert(std::is_same_v<TI, CycleInfo> || std::is_same_v<TI, LoopInfo>,
+ "type must be CycleInfo or LoopInfo");
+ if (!LCI)
+ return;
+
+ T *LC;
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LC = LCI->getCycle(AttachToCallBr ? CallBrBlock : Succ);
+ else
+ LC = LCI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ);
+ if (!LC)
+ return;
+
+ // Check if the cycles/loops are disjoint. In that case, we do not add the
+ // intermediate target to any cycle/loop.
+ if (AttachToCallBr && disjointWithBlock(LCI, LC, Succ))
+ return;
+
+ if constexpr (std::is_same_v<TI, CycleInfo>)
+ LCI->addBlockToCycle(CallBrTarget, LC);
+ else
+ LC->addBasicBlockToLoop(CallBrTarget, *LCI);
+}
+
BasicBlock *ControlFlowHub::createCallBrTarget(
CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx, bool AttachToCallBr,
CycleInfo *CI, DomTreeUpdater *DTU, LoopInfo *LI) {
@@ -359,36 +406,10 @@ BasicBlock *ControlFlowHub::createCallBrTarget(
CallBr->setSuccessor(SuccIdx, CallBrTarget);
// Jump from the new target block to the original successor.
BranchInst::Create(Succ, CallBrTarget);
- if (LI) {
- if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ)) {
- bool AddToLoop = true;
- if (AttachToCallBr) {
- // Check if the loops are disjoint. In that case, we do not add the
- // intermediate target to any loop.
- if (auto *LL = LI->getLoopFor(Succ);
- LL && !L->contains(LL) && !LL->contains(L))
- AddToLoop = false;
- }
- if (AddToLoop)
- L->addBasicBlockToLoop(CallBrTarget, *LI);
- }
- }
- if (CI) {
- if (auto *C = CI->getCycle(AttachToCallBr ? CallBrBlock : Succ); C) {
- bool AddToCycle = true;
- if (AttachToCallBr) {
- // Check if the cycles are disjoint. In that case, we do not add the
- // intermediate target to any cycle.
- if (auto *CC = CI->getCycle(Succ); CC) {
- auto *CommonC = CI->getSmallestCommonCycle(C, CC);
- if (CommonC != C && CommonC != CC)
- AddToCycle = false;
- }
- }
- if (AddToCycle)
- CI->addBlockToCycle(CallBrTarget, C);
- }
- }
+ updateCycleLoopInfo<LoopInfo, Loop>(LI, AttachToCallBr, CallBrBlock,
+ CallBrTarget, Succ);
+ updateCycleLoopInfo<CycleInfo, Cycle>(CI, AttachToCallBr, CallBrBlock,
+ CallBrTarget, Succ);
if (DTU) {
DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
if (DTU->getDomTree().dominates(CallBrBlock, Succ))
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index a22bbfbf87a4e..3220d836f38f2 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -308,7 +308,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
if (Succ != Header)
continue;
BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
- CallBr, Succ, I, false, &CI, &DTU, LI);
+ CallBr, Succ, I, /* AttachToCallBr = */ false, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
LLVM_DEBUG(dbgs() << "Added internal branch: " << printBBPtr(NewSucc)
<< " -> " << printBBPtr(Succ) << "\n");
@@ -345,7 +345,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
if (!C.contains(Succ))
continue;
BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
- CallBr, Succ, I, true, &CI, &DTU, LI);
+ CallBr, Succ, I, /* AttachToCallBr = */ true, &CI, &DTU, LI);
CHub.addBranch(NewSucc, Succ);
LLVM_DEBUG(dbgs() << "Added external branch: " << printBBPtr(NewSucc)
<< " -> " << printBBPtr(Succ) << "\n");
@@ -400,8 +400,6 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
-
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
for (Cycle *C : depth_first(TopCycle)) {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9007f4780d5ae..c5d8d4b48f026 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -179,7 +179,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
if (L->contains(Succ))
continue;
BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
- CallBr, Succ, J, false, nullptr, &DTU, &LI);
+ CallBr, Succ, J, /* AttachToCallBr = */ false, nullptr, &DTU, &LI);
// ExitingBlocks is later used to restore SSA, so we need to make sure
// that the blocks used for phi nodes in the guard blocks match the
// predecessors of the guard blocks, which, in the case of callbr, are
@@ -219,7 +219,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
// members of the parent loop. Same goes for the callbr target blocks if they
// have not already been added to the respective parent loop by adding them to
// the original callbr target's loop.
- if (auto ParentLoop = L->getParentLoop()) {
+ if (auto *ParentLoop = L->getParentLoop()) {
for (auto *G : GuardBlocks) {
ParentLoop->addBasicBlockToLoop(G, LI);
}
@@ -254,8 +254,6 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
-
return runImpl(LI, DT);
}
>From 47408c30af9b0dd6b4c791130aad4a91efee8949 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 5 Aug 2025 10:24:07 -0500
Subject: [PATCH 5/6] [AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add
support for callbr instruction with basic inline-asm
Finishes adding basic inline-asm callbr support for AMDGPU, started by
https://github.com/llvm/llvm-project/pull/149308.
---
.../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp | 89 +++---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +-
llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 15 +-
llvm/test/CodeGen/AMDGPU/callbr.ll | 54 ++++
...nify-divergent-exit-nodes-with-musttail.ll | 51 ++++
llvm/test/CodeGen/AMDGPU/infinite-loop.ll | 258 ++++++++++++++++--
.../si-annotate-nested-control-flows.ll | 100 ++++++-
.../si-unify-exit-multiple-unreachables.ll | 161 ++++++++++-
llvm/test/CodeGen/AMDGPU/update-phi.ll | 39 +++
llvm/test/Transforms/StructurizeCFG/callbr.ll | 56 ++++
10 files changed, 751 insertions(+), 82 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/callbr.ll
create mode 100644 llvm/test/Transforms/StructurizeCFG/callbr.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 733c5d520fb23..2df6bbc74f6da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}
+static BasicBlock *
+createDummyReturnBlock(Function &F,
+ SmallVector<BasicBlock *, 4> &ReturningBlocks) {
+ BasicBlock *DummyReturnBB =
+ BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
+ Type *RetTy = F.getReturnType();
+ Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
+ ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
+ ReturningBlocks.push_back(DummyReturnBB);
+ return DummyReturnBB;
+}
+
+/// Handle conditional branch instructions (-> 2 targets) and callbr
+/// instructions with N targets.
+static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
+ BasicBlock *DummyReturnBB,
+ std::vector<DominatorTree::UpdateType> &Updates) {
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
+
+ // Create a new transition block to hold the conditional branch.
+ BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
+
+ Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
+
+ // 'Successors' become successors of TransitionBB instead of BB,
+ // and TransitionBB becomes a single successor of BB.
+ Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
+ for (BasicBlock *Successor : Successors) {
+ Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
+ Updates.emplace_back(DominatorTree::Delete, BB, Successor);
+ }
+
+ // Create a branch that will always branch to the transition block and
+ // references DummyReturnBB.
+ BB->getTerminator()->eraseFromParent();
+ BranchInst::Create(TransitionBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
+ Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+}
+
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
-
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
- !isa<BranchInst>(PDT.getRoot()->getTerminator())))
+ !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
return false;
// Loop over all of the blocks in a function, tracking all of the blocks that
@@ -222,46 +260,27 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
-
- ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
- if (DummyReturnBB == nullptr) {
- DummyReturnBB = BasicBlock::Create(F.getContext(),
- "DummyReturnBlock", &F);
- Type *RetTy = F.getReturnType();
- Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
- ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
- ReturningBlocks.push_back(DummyReturnBB);
- }
+ if (DummyReturnBB == nullptr)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
- BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
- Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
- } else { // Conditional branch.
- SmallVector<BasicBlock *, 2> Successors(successors(BB));
-
- // Create a new transition block to hold the conditional branch.
- BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
-
- Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
-
- // 'Successors' become successors of TransitionBB instead of BB,
- // and TransitionBB becomes a single successor of BB.
- Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
- for (BasicBlock *Successor : Successors) {
- Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
- Updates.emplace_back(DominatorTree::Delete, BB, Successor);
- }
-
- // Create a branch that will always branch to the transition block and
- // references DummyReturnBB.
- BB->getTerminator()->eraseFromParent();
- BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
+ BranchInst::Create(LoopHeaderBB, DummyReturnBB,
+ ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
+ } else {
+ handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
Changed = true;
+ } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
+ if (DummyReturnBB == nullptr)
+ DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
+
+ handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
+ } else {
+ llvm_unreachable("unsupported block terminator");
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index dfe6f65d240e6..5f09cfbc6d817 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16476,12 +16476,12 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
- const unsigned BitWidth = VT.getSizeInBits();
switch (Constraint[0]) {
default:
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
case 's':
- case 'r':
+ case 'r': {
+ const unsigned BitWidth = VT.getSizeInBits();
switch (BitWidth) {
case 16:
RC = &AMDGPU::SReg_32RegClass;
@@ -16496,7 +16496,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
break;
}
break;
- case 'v':
+ }
+ case 'v': {
+ const unsigned BitWidth = VT.getSizeInBits();
switch (BitWidth) {
case 16:
RC = Subtarget->useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass
@@ -16509,9 +16511,11 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
break;
}
break;
+ }
case 'a':
if (!Subtarget->hasMAIInsts())
break;
+ const unsigned BitWidth = VT.getSizeInBits();
switch (BitWidth) {
case 16:
RC = &AMDGPU::AGPR_32RegClass;
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index a69d64956d6d9..d43ed8a9364b8 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -480,11 +480,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
- BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-
- for (BasicBlock *Succ : Term->successors())
- if (Visited.count(Succ))
- Loops[Succ] = BB;
+ if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
+ for (BasicBlock *Succ : Term->successors())
+ if (Visited.count(Succ))
+ Loops[Succ] = BB;
}
}
@@ -516,7 +515,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
- if (!ParentRegion->contains(P))
+ if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;
Region *R = RI->getRegionFor(P);
@@ -1284,13 +1283,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
- if (R->isTopLevelRegion())
+ // CallBr and its corresponding blocks must not be modified by this pass.
+ if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;
this->DT = DT;
Func = R->getEntry()->getParent();
- assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
ParentRegion = R;
diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll
new file mode 100644
index 0000000000000..253a6ec100eae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/callbr.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
+
+define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) {
+; CHECK-LABEL: callbr_inline_asm:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v0, v[0:1]
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ; %bb.1: ; %fallthrough
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_store_dword v[2:3], v0
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target
+; CHECK-NEXT: ; %indirect
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_store_dword v[4:5], v0
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %a = load i32, ptr %src, align 4
+ callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect]
+fallthrough:
+ store i32 %a, ptr %dst1, align 4
+ br label %ret
+indirect:
+ store i32 %a, ptr %dst2, align 4
+ br label %ret
+ret:
+ ret void
+}
+
+define void @callbr_self_loop(i1 %c) {
+; CHECK-LABEL: callbr_self_loop:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: .LBB1_1: ; %callbr
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_branch .LBB1_1
+; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target
+; CHECK-NEXT: ; %callbr.target.ret
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ br label %callbr
+callbr:
+ callbr void asm "", "!i"() to label %callbr [label %ret]
+ret:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
index 007e3f0a6bdbc..076a99ff8588f 100644
--- a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
+++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
@@ -3,6 +3,7 @@
declare void @foo(ptr)
declare i1 @bar(ptr)
+declare i32 @bar32(ptr)
define void @musttail_call_without_return_value(ptr %p) {
; CHECK-LABEL: define void @musttail_call_without_return_value(
@@ -28,6 +29,31 @@ bb.1:
ret void
}
+define void @musttail_call_without_return_value_callbr(ptr %p) {
+; CHECK-LABEL: define void @musttail_call_without_return_value_callbr(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
+; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
+; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: musttail call void @foo(ptr [[P]])
+; CHECK-NEXT: ret void
+; CHECK: [[BB_1:.*:]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %load = load i32, ptr %p, align 1
+ callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
+
+bb.0:
+ musttail call void @foo(ptr %p)
+ ret void
+
+bb.1:
+ ret void
+}
+
define i1 @musttail_call_with_return_value(ptr %p) {
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
@@ -51,3 +77,28 @@ bb.0:
bb.1:
ret i1 %load
}
+
+define i32 @musttail_call_with_return_value_callbr(ptr %p) {
+; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
+; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
+; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]])
+; CHECK-NEXT: ret i32 [[RET]]
+; CHECK: [[BB_1:.*:]]
+; CHECK-NEXT: ret i32 [[LOAD]]
+;
+entry:
+ %load = load i32, ptr %p, align 1
+ callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
+
+bb.0:
+ %ret = musttail call i32 @bar32(ptr %p)
+ ret i32 %ret
+
+bb.1:
+ ret i32 %load
+}
diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
index bea532bd52955..2181c77f0e6ef 100644
--- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -36,26 +36,60 @@ loop:
br label %loop
}
+define amdgpu_kernel void @infinite_loop_callbr(ptr addrspace(1) %out) {
+; SI-LABEL: infinite_loop_callbr:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: s_endpgm
+; IR-LABEL: @infinite_loop_callbr(
+; IR-NEXT: entry:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[LOOP:%.*]] []
+; IR: loop:
+; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
+; IR: TransitionBlock:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[LOOP]] []
+; IR: DummyReturnBlock:
+; IR-NEXT: ret void
+;
+entry:
+ callbr void asm "", ""() to label %loop []
+
+loop:
+ store volatile i32 999, ptr addrspace(1) %out, align 4
+ callbr void asm "", ""() to label %loop []
+}
+
define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_ret:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; SI-NEXT: s_cbranch_execz .LBB1_3
+; SI-NEXT: s_cbranch_execz .LBB2_3
; SI-NEXT: ; %bb.1: ; %loop.preheader
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, -1
-; SI-NEXT: .LBB1_2: ; %loop
+; SI-NEXT: .LBB2_2: ; %loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
-; SI-NEXT: s_cbranch_vccnz .LBB1_2
-; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
+; SI-NEXT: s_cbranch_vccnz .LBB2_2
+; SI-NEXT: .LBB2_3: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_ret(
; IR-NEXT: entry:
@@ -81,44 +115,93 @@ return:
ret void
}
+define amdgpu_kernel void @infinite_loop_ret_callbr(ptr addrspace(1) %out) {
+; SI-LABEL: infinite_loop_ret_callbr:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: ; %bb.1: ; %loop.preheader
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: .LBB3_2: ; Inline asm indirect target
+; SI-NEXT: ; %UnifiedReturnBlock
+; SI-NEXT: ; Label of block must be emitted
+; SI-NEXT: s_endpgm
+; IR-LABEL: @infinite_loop_ret_callbr(
+; IR-NEXT: entry:
+; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1
+; IR-NEXT: [[COND32:%.*]] = zext i1 [[COND]] to i32
+; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND32]])
+; IR-NEXT: to label [[LOOP:%.*]] [label %UnifiedReturnBlock]
+; IR: loop:
+; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
+; IR: TransitionBlock:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[LOOP]] []
+; IR: UnifiedReturnBlock:
+; IR-NEXT: ret void
+;
+entry:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %cond = icmp eq i32 %tmp, 1
+ %cond32 = zext i1 %cond to i32
+ callbr void asm "", "r,!i"(i32 %cond32) to label %loop [label %return]
+
+loop:
+ store volatile i32 999, ptr addrspace(1) %out, align 4
+ callbr void asm "", ""() to label %loop []
+
+return:
+ ret void
+}
+
define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loops:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b64 s[2:3], -1
-; SI-NEXT: s_cbranch_scc1 .LBB2_4
+; SI-NEXT: s_cbranch_scc1 .LBB4_4
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x378
; SI-NEXT: s_and_b64 vcc, exec, -1
-; SI-NEXT: .LBB2_2: ; %loop2
+; SI-NEXT: .LBB4_2: ; %loop2
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
-; SI-NEXT: s_cbranch_vccnz .LBB2_2
+; SI-NEXT: s_cbranch_vccnz .LBB4_2
; SI-NEXT: ; %bb.3: ; %Flow
; SI-NEXT: s_mov_b64 s[2:3], 0
-; SI-NEXT: .LBB2_4: ; %Flow2
+; SI-NEXT: .LBB4_4: ; %Flow2
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
-; SI-NEXT: s_cbranch_vccz .LBB2_7
+; SI-NEXT: s_cbranch_vccz .LBB4_7
; SI-NEXT: ; %bb.5:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, 0
-; SI-NEXT: .LBB2_6: ; %loop1
+; SI-NEXT: .LBB4_6: ; %loop1
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
-; SI-NEXT: s_cbranch_vccz .LBB2_6
-; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
+; SI-NEXT: s_cbranch_vccz .LBB4_6
+; SI-NEXT: .LBB4_7: ; %DummyReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loops(
; IR-NEXT: entry:
@@ -144,24 +227,78 @@ loop2:
br label %loop2
}
+define amdgpu_kernel void @infinite_loops_callbr(ptr addrspace(1) %out) {
+; SI-LABEL: infinite_loops_callbr:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: ; %bb.1: ; %loop1
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: s_endpgm
+; SI-NEXT: .LBB5_2: ; Inline asm indirect target
+; SI-NEXT: ; %loop2.preheader
+; SI-NEXT: ; Label of block must be emitted
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0x378
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: s_endpgm
+; IR-LABEL: @infinite_loops_callbr(
+; IR-NEXT: entry:
+; IR-NEXT: callbr void asm "", "r,!i"(i32 poison)
+; IR-NEXT: to label [[LOOP1:%.*]] [label %loop2]
+; IR: loop1:
+; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
+; IR: TransitionBlock:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[LOOP1]] []
+; IR: loop2:
+; IR-NEXT: store volatile i32 888, ptr addrspace(1) [[OUT]], align 4
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK1:%.*]], label [[DUMMYRETURNBLOCK]]
+; IR: TransitionBlock1:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[LOOP2:%.*]] []
+; IR: DummyReturnBlock:
+; IR-NEXT: ret void
+;
+entry:
+ callbr void asm "", "r,!i"(i32 poison) to label %loop1 [label %loop2]
+
+loop1:
+ store volatile i32 999, ptr addrspace(1) %out, align 4
+ callbr void asm "", ""() to label %loop1 []
+
+loop2:
+ store volatile i32 888, ptr addrspace(1) %out, align 4
+ callbr void asm "", ""() to label %loop2 []
+}
+
define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_nest_ret:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; SI-NEXT: s_cbranch_execz .LBB3_5
+; SI-NEXT: s_cbranch_execz .LBB6_5
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
-; SI-NEXT: .LBB3_2: ; %outer_loop
+; SI-NEXT: .LBB6_2: ; %outer_loop
; SI-NEXT: ; =>This Loop Header: Depth=1
-; SI-NEXT: ; Child Loop BB3_3 Depth 2
+; SI-NEXT: ; Child Loop BB6_3 Depth 2
; SI-NEXT: s_mov_b64 s[2:3], 0
-; SI-NEXT: .LBB3_3: ; %inner_loop
-; SI-NEXT: ; Parent Loop BB3_2 Depth=1
+; SI-NEXT: .LBB6_3: ; %inner_loop
+; SI-NEXT: ; Parent Loop BB6_2 Depth=1
; SI-NEXT: ; => This Inner Loop Header: Depth=2
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
@@ -169,13 +306,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
-; SI-NEXT: s_cbranch_execnz .LBB3_3
+; SI-NEXT: s_cbranch_execnz .LBB6_3
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
-; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
+; SI-NEXT: ; in Loop: Header=BB6_2 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: s_branch .LBB3_2
-; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
+; SI-NEXT: s_branch .LBB6_2
+; SI-NEXT: .LBB6_5: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_nest_ret(
; IR-NEXT: entry:
@@ -212,4 +349,83 @@ return:
ret void
}
+define amdgpu_kernel void @infinite_loop_nest_ret_callbr(ptr addrspace(1) %out) {
+; SI-LABEL: infinite_loop_nest_ret_callbr:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
+; SI-NEXT: ; implicit-def: $sgpr4_sgpr5
+; SI-NEXT: s_branch .LBB7_3
+; SI-NEXT: .LBB7_2: ; %loop.exit.guard
+; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
+; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
+; SI-NEXT: s_mov_b64 s[4:5], 0
+; SI-NEXT: s_cbranch_vccnz .LBB7_5
+; SI-NEXT: .LBB7_3: ; %outer_loop
+; SI-NEXT: ; =>This Inner Loop Header: Depth=1
+; SI-NEXT: ;;#ASMSTART
+; SI-NEXT: ;;#ASMEND
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: s_and_b64 vcc, exec, s[4:5]
+; SI-NEXT: s_mov_b64 s[4:5], -1
+; SI-NEXT: s_cbranch_vccz .LBB7_2
+; SI-NEXT: ; %bb.4: ; %TransitionBlock.target.outer_loop
+; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
+; SI-NEXT: s_mov_b64 s[4:5], 0
+; SI-NEXT: s_branch .LBB7_2
+; SI-NEXT: .LBB7_5: ; Inline asm indirect target
+; SI-NEXT: ; %UnifiedReturnBlock
+; SI-NEXT: ; Label of block must be emitted
+; SI-NEXT: s_endpgm
+; IR-LABEL: @infinite_loop_nest_ret_callbr(
+; IR-NEXT: entry:
+; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
+; IR-NEXT: [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
+; IR-NEXT: [[COND1_32:%.*]] = zext i1 [[COND1]] to i32
+; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND1_32]])
+; IR-NEXT: to label [[OUTER_LOOP:%.*]] [label %UnifiedReturnBlock]
+; IR: outer_loop:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[INNER_LOOP:%.*]] []
+; IR: inner_loop:
+; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
+; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
+; IR-NEXT: [[COND3_32:%.*]] = zext i1 [[COND3]] to i32
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
+; IR: TransitionBlock:
+; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND3_32]])
+; IR-NEXT: to label [[INNER_LOOP]] [label %outer_loop]
+; IR: UnifiedReturnBlock:
+; IR-NEXT: ret void
+;
+entry:
+ %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %cond1 = icmp ne i32 %tmp, 1 ; avoid following BB optimizing away through the domination
+ %cond1_32 = zext i1 %cond1 to i32
+ callbr void asm "", "r,!i"(i32 %cond1_32) to label %outer_loop [label %return]
+
+outer_loop:
+ ; %cond2 = icmp eq i32 %tmp, 2
+ ; br i1 %cond2, label %outer_loop, label %inner_loop
+ callbr void asm "", ""() to label %inner_loop []
+
+inner_loop: ; preds = %LeafBlock, %LeafBlock1
+ store volatile i32 999, ptr addrspace(1) %out, align 4
+ %cond3 = icmp eq i32 %tmp, 3
+ %cond3_32 = zext i1 %cond3 to i32
+ callbr void asm "", "r,!i"(i32 %cond3_32) to label %inner_loop [label %outer_loop]
+
+return:
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
index 34de1e48bfb59..01bcdad3fc220 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll
@@ -3,15 +3,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA
define void @nested_inf_loop(i1 %0, i1 %1) {
-; OPT-LABEL: @nested_inf_loop(
-; OPT-NEXT: BB:
-; OPT-NEXT: br label [[BB1:%.*]]
-; OPT: BB1:
-; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]]
-; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]]
-; OPT: infloop:
-; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]]
-; OPT: DummyReturnBlock:
+; OPT-LABEL: define void @nested_inf_loop(
+; OPT-SAME: i1 [[TMP0:%.*]], i1 [[TMP1:%.*]]) {
+; OPT-NEXT: [[BB:.*:]]
+; OPT-NEXT: br label %[[BB1:.*]]
+; OPT: [[BB1]]:
+; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]]
+; OPT-NEXT: br i1 [[BRMERGE]], label %[[BB1]], label %[[INFLOOP:.*]]
+; OPT: [[INFLOOP]]:
+; OPT-NEXT: br i1 true, label %[[INFLOOP]], label %[[DUMMYRETURNBLOCK:.*]]
+; OPT: [[DUMMYRETURNBLOCK]]:
; OPT-NEXT: ret void
;
; ISA-LABEL: nested_inf_loop:
@@ -63,3 +64,84 @@ BB4:
BB3:
br label %BB1
}
+
+define void @nested_inf_loop_callbr(i32 %0, i32 %1) {
+; OPT-LABEL: define void @nested_inf_loop_callbr(
+; OPT-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; OPT-NEXT: [[BB:.*:]]
+; OPT-NEXT: callbr void asm "", ""()
+; OPT-NEXT: to label %[[BB1:.*]] []
+; OPT: [[BB1]]:
+; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP0]])
+; OPT-NEXT: to label %[[BB3:.*]] [label %BB2]
+; OPT: [[BB2:.*:]]
+; OPT-NEXT: callbr void asm "", ""()
+; OPT-NEXT: to label %[[BB4:.*]] []
+; OPT: [[BB4]]:
+; OPT-NEXT: br i1 true, label %[[TRANSITIONBLOCK:.*]], label %[[DUMMYRETURNBLOCK:.*]]
+; OPT: [[TRANSITIONBLOCK]]:
+; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP1]])
+; OPT-NEXT: to label %[[BB3]] [label %BB4]
+; OPT: [[BB3]]:
+; OPT-NEXT: callbr void asm "", ""()
+; OPT-NEXT: to label %[[BB1]] []
+; OPT: [[DUMMYRETURNBLOCK]]:
+; OPT-NEXT: ret void
+;
+; ISA-LABEL: nested_inf_loop_callbr:
+; ISA: ; %bb.0: ; %BB
+; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ISA-NEXT: ;;#ASMSTART
+; ISA-NEXT: ;;#ASMEND
+; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
+; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
+; ISA-NEXT: .LBB1_1: ; %BB1
+; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
+; ISA-NEXT: ;;#ASMSTART
+; ISA-NEXT: ;;#ASMEND
+; ISA-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
+; ISA-NEXT: s_and_b64 s[8:9], s[4:5], exec
+; ISA-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
+; ISA-NEXT: .LBB1_2: ; %BB3
+; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
+; ISA-NEXT: ;;#ASMSTART
+; ISA-NEXT: ;;#ASMEND
+; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
+; ISA-NEXT: s_and_b64 s[8:9], s[6:7], exec
+; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
+; ISA-NEXT: s_branch .LBB1_1
+; ISA-NEXT: .LBB1_3: ; Inline asm indirect target
+; ISA-NEXT: ; %BB2
+; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
+; ISA-NEXT: ; Label of block must be emitted
+; ISA-NEXT: ;;#ASMSTART
+; ISA-NEXT: ;;#ASMEND
+; ISA-NEXT: s_mov_b64 s[6:7], -1
+; ISA-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
+; ISA-NEXT: s_cbranch_execz .LBB1_5
+; ISA-NEXT: ; %bb.4: ; %TransitionBlock.target.BB3
+; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
+; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
+; ISA-NEXT: .LBB1_5: ; %loop.exit.guard
+; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
+; ISA-NEXT: s_or_b64 exec, exec, s[8:9]
+; ISA-NEXT: s_and_b64 vcc, exec, s[6:7]
+; ISA-NEXT: s_mov_b64 s[6:7], 0
+; ISA-NEXT: s_cbranch_vccz .LBB1_2
+; ISA-NEXT: ; %bb.6: ; %DummyReturnBlock
+; ISA-NEXT: s_setpc_b64 s[30:31]
+BB:
+ callbr void asm "", ""() to label %BB1 []
+
+BB1:
+ callbr void asm "", "r,!i"(i32 %0) to label %BB3 [label %BB2]
+
+BB2:
+ callbr void asm "", ""() to label %BB4 []
+
+BB4:
+ callbr void asm "", "r,!i"(i32 %1) to label %BB3 [label %BB4]
+
+BB3:
+ callbr void asm "", ""() to label %BB1 []
+}
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
index 2dfb72a08cffc..ee7df73c849f7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s
declare void @llvm.trap()
@@ -70,8 +70,33 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
; CHECK-NEXT: s_mov_b64 s[2:3], -1
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: s_branch .LBB0_4
-
-
+; UNIFY-LABEL: @kernel(
+; UNIFY-NEXT: entry:
+; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
+; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; UNIFY: if.then:
+; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
+; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]]
+; UNIFY: cond.false:
+; UNIFY-NEXT: call void @llvm.trap()
+; UNIFY-NEXT: unreachable
+; UNIFY: if.else:
+; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
+; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]]
+; UNIFY: if.then3:
+; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
+; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]]
+; UNIFY: cond.false.i8:
+; UNIFY-NEXT: call void @llvm.trap()
+; UNIFY-NEXT: unreachable
+; UNIFY: if.end6.sink.split:
+; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
+; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
+; UNIFY-NEXT: br label [[IF_END6]]
+; UNIFY: if.end6:
+; UNIFY-NEXT: ret void
+;
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = icmp eq i32 %n, 256
@@ -105,5 +130,129 @@ if.end6.sink.split:
if.end6:
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; UNIFY: {{.*}}
+
+define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
+; CHECK-LABEL: kernel_callbr:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10
+; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100
+; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ; %bb.1: ; %if.then
+; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split
+; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, s0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target
+; CHECK-NEXT: ; %UnifiedReturnBlock
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target
+; CHECK-NEXT: ; %if.else
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0
+; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ; %bb.5: ; %if.then3
+; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_branch .LBB1_2
+; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target
+; CHECK-NEXT: ; %cond.false.i8
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target
+; CHECK-NEXT: ; %cond.false
+; CHECK-NEXT: ; Label of block must be emitted
+; CHECK-NEXT: s_trap 2
+; CHECK-NEXT: ; divergent unreachable
+; CHECK-NEXT: s_branch .LBB1_3
+; UNIFY-LABEL: @kernel_callbr(
+; UNIFY-NEXT: entry:
+; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
+; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32
+; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]])
+; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else]
+; UNIFY: if.then:
+; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
+; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32
+; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]])
+; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false]
+; UNIFY: cond.false:
+; UNIFY-NEXT: call void @llvm.trap()
+; UNIFY-NEXT: unreachable
+; UNIFY: if.else:
+; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
+; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32
+; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]])
+; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6]
+; UNIFY: if.then3:
+; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
+; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32
+; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]])
+; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8]
+; UNIFY: cond.false.i8:
+; UNIFY-NEXT: call void @llvm.trap()
+; UNIFY-NEXT: unreachable
+; UNIFY: if.end6.sink.split:
+; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
+; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
+; UNIFY-NEXT: callbr void asm "", ""()
+; UNIFY-NEXT: to label [[IF_END6:%.*]] []
+; UNIFY: if.end6:
+; UNIFY-NEXT: ret void
+;
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %cmp = icmp eq i32 %n, 256
+ %cmp32 = zext i1 %cmp to i32
+ callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else]
+
+if.then:
+ %cmp1 = icmp eq i32 %a, 0
+ %cmp1_32 = zext i1 %cmp1 to i32
+ callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false]
+
+cond.false:
+ call void @llvm.trap()
+ unreachable
+
+if.else:
+ %cmp2 = icmp ult i32 %tid, 10
+ %cmp2_32 = zext i1 %cmp2 to i32
+ callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6]
+
+if.then3:
+ %cmp1.i7 = icmp eq i32 %a, 0
+ %cmp1.i7_32 = zext i1 %cmp1.i7 to i32
+ callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8]
+
+cond.false.i8:
+ call void @llvm.trap()
+ unreachable
+
+if.end6.sink.split:
+ %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid
+ store i32 %a, ptr addrspace(1) %x1, align 4
+ callbr void asm "", ""() to label %if.end6 []
+
+if.end6:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/update-phi.ll b/llvm/test/CodeGen/AMDGPU/update-phi.ll
index 50666bee325e8..684dc1a1f0092 100644
--- a/llvm/test/CodeGen/AMDGPU/update-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/update-phi.ll
@@ -37,3 +37,42 @@ n28: ; preds = %.loopexit, %n28
n31: ; preds =
ret void
}
+
+define amdgpu_ps void @_amdgpu_ps_main_callbr() local_unnamed_addr #3 {
+; IR-LABEL: @_amdgpu_ps_main_callbr(
+; IR-NEXT: .entry:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[DOTLOOPEXIT:%.*]] []
+; IR: .loopexit:
+; IR-NEXT: callbr void asm "", ""()
+; IR-NEXT: to label [[N28:%.*]] []
+; IR: n28:
+; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
+; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
+; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
+; IR-NEXT: [[N30_32:%.*]] = zext i1 [[N30]] to i32
+; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]]
+; IR: TransitionBlock:
+; IR-NEXT: callbr void asm "", "r,!i"(i32 [[N30_32]])
+; IR-NEXT: to label [[DOTLOOPEXIT]] [label %n28]
+; IR: n31:
+; IR-NEXT: ret void
+; IR: DummyReturnBlock:
+; IR-NEXT: ret void
+;
+.entry:
+ callbr void asm "", ""() to label %.loopexit []
+
+.loopexit: ; preds = %n28, %.entry
+ callbr void asm "", ""() to label %n28 []
+
+n28: ; preds = %.loopexit, %n28
+ %.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
+ %n29 = fadd float %.01, 1.0
+ %n30 = fcmp ogt float %n29, 4.000000e+00
+ %n30.32 = zext i1 %n30 to i32
+ callbr void asm "", "r,!i"(i32 %n30.32) to label %.loopexit [label %n28]
+
+n31: ; preds =
+ ret void
+}
diff --git a/llvm/test/Transforms/StructurizeCFG/callbr.ll b/llvm/test/Transforms/StructurizeCFG/callbr.ll
new file mode 100644
index 0000000000000..7bc973397d5a1
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/callbr.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
+
+; Structurize as usual, but don't tear callbr and its destination blocks apart.
+
+define void @callbr_inline_asm(i32 %c, i1 %d, i1 %e) {
+; CHECK-LABEL: define void @callbr_inline_asm(
+; CHECK-SAME: i32 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]]) {
+; CHECK-NEXT: [[D_INV:%.*]] = xor i1 [[D]], true
+; CHECK-NEXT: br i1 [[D_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]]
+; CHECK: [[FLOW3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]]
+; CHECK: [[CALLBR]]:
+; CHECK-NEXT: callbr void asm "", "!i"()
+; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
+; CHECK: [[INDIRECT]]:
+; CHECK-NEXT: br i1 [[E]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]]
+; CHECK: [[FALLTHROUGH1]]:
+; CHECK-NEXT: br label %[[FLOW2]]
+; CHECK: [[INDIRECT2:.*:]]
+; CHECK-NEXT: br i1 [[E]], label %[[INDIRECT1:.*]], label %[[FLOW1:.*]]
+; CHECK: [[INDIRECT1]]:
+; CHECK-NEXT: br label %[[FLOW1]]
+; CHECK: [[NOCALLBR]]:
+; CHECK-NEXT: br i1 [[E]], label %[[NOCALLBR1:.*]], label %[[FLOW]]
+; CHECK: [[NOCALLBR1]]:
+; CHECK-NEXT: br label %[[FLOW]]
+; CHECK: [[FLOW]]:
+; CHECK-NEXT: br label %[[FLOW3]]
+; CHECK: [[FLOW1]]:
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[FLOW2]]:
+; CHECK-NEXT: br label %[[RET]]
+; CHECK: [[RET]]:
+; CHECK-NEXT: ret void
+;
+ br i1 %d, label %callbr, label %nocallbr
+callbr:
+ callbr void asm "", "!i"() to label %fallthrough [label %indirect]
+fallthrough:
+ br i1 %e, label %fallthrough1, label %ret
+fallthrough1:
+ br label %ret
+indirect:
+ br i1 %e, label %indirect1, label %ret
+indirect1:
+ br label %ret
+nocallbr:
+ br i1 %e, label %nocallbr1, label %ret
+nocallbr1:
+ br label %ret
+ret:
+ ret void
+}
>From ba9e6db89ecfdf4cc4960f3a33ddc00dcb0250c0 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 1 Apr 2025 08:03:16 -0500
Subject: [PATCH 6/6] [IR] Add CallBr intrinsics support
This commit adds support for using intrinsics with callbr.
The uses of this will most of the time look like this example:
```llvm
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
kill:
unreachable
cont:
...
```
---
llvm/docs/LangRef.rst | 28 ++-
.../llvm/CodeGen/GlobalISel/IRTranslator.h | 6 +
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 97 ++++++--
.../SelectionDAG/SelectionDAGBuilder.cpp | 227 ++++++++++++------
.../SelectionDAG/SelectionDAGBuilder.h | 15 +-
llvm/lib/IR/Verifier.cpp | 33 ++-
llvm/test/Assembler/callbr.ll | 20 ++
llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll | 101 ++++++++
llvm/test/Verifier/callbr.ll | 56 +++++
polly/test/ScopDetect/callbr.ll | 9 +-
10 files changed, 476 insertions(+), 116 deletions(-)
create mode 100644 llvm/test/Assembler/callbr.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2759e18301d58..e6080e4741a02 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -9624,8 +9624,12 @@ The '``callbr``' instruction causes control to transfer to a specified
function, with the possibility of control flow transfer to either the
'``fallthrough``' label or one of the '``indirect``' labels.
-This instruction should only be used to implement the "goto" feature of gcc
-style inline assembly. Any other usage is an error in the IR verifier.
+This instruction can currently only be used
+
+#. to implement the "goto" feature of gcc style inline assembly or
+#. to call selected intrinsics.
+
+Any other usage is an error in the IR verifier.
Note that in order to support outputs along indirect edges, LLVM may need to
split critical edges, which may require synthesizing a replacement block for
@@ -9674,7 +9678,8 @@ This instruction requires several arguments:
indicates the function accepts a variable number of arguments, the
extra arguments can be specified.
#. '``fallthrough label``': the label reached when the inline assembly's
- execution exits the bottom.
+ execution exits the bottom. In case of an intrinsic call, the semantic
+ depends on the semantic of the intrinsic.
#. '``indirect labels``': the labels reached when a callee transfers control
to a location other than the '``fallthrough label``'. Label constraints
refer to these destinations.
@@ -9692,9 +9697,12 @@ flow goes after the call.
The output values of a '``callbr``' instruction are available both in the
the '``fallthrough``' block, and any '``indirect``' blocks(s).
-The only use of this today is to implement the "goto" feature of gcc inline
-assembly where additional labels can be provided as locations for the inline
-assembly to jump to.
+The only uses of this today are:
+
+#. implement the "goto" feature of gcc inline assembly where additional
+ labels can be provided as locations for the inline assembly to jump to.
+#. support selected intrinsics which manipulate control flow and should
+ be chained to specific terminators, such as '``unreachable``'.
Example:
""""""""
@@ -9709,6 +9717,14 @@ Example:
<result> = callbr i32 asm "", "=r,r,!i"(i32 %x)
to label %fallthrough [label %indirect]
+ ; intrinsic which should be followed by unreachable (the order of the
+ ; blocks after the callbr instruction doesn't matter)
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+ cont:
+ ...
+ kill:
+ unreachable
+
.. _i_resume:
'``resume``' Instruction
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 6fd05c8fddd5f..941750510f1e1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -297,6 +297,10 @@ class IRTranslator : public MachineFunctionPass {
/// \pre \p U is a call instruction.
bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateTargetIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
@@ -313,6 +317,8 @@ class IRTranslator : public MachineFunctionPass {
bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder);
+ bool translateCallBrIntrinsic(const CallBrInst &I,
+ MachineIRBuilder &MIRBuilder);
bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index d7280eaba2440..8b89969f4b8f4 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2789,20 +2789,35 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
+ TargetLowering::IntrinsicInfo Info;
+ // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
+ bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID);
+
+ return translateTargetIntrinsic(CI, ID, MIRBuilder,
+ IsTgtMemIntrinsic ? &Info : nullptr);
+}
+
+/// Translate a call or callbr to a target intrinsic.
+/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo
+/// is a pointer to the correspondingly populated IntrinsicInfo object.
+/// Otherwise, this pointer is null.
+bool IRTranslator::translateTargetIntrinsic(
+ const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
ArrayRef<Register> ResultRegs;
- if (!CI.getType()->isVoidTy())
- ResultRegs = getOrCreateVRegs(CI);
+ if (!CB.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CB);
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
- if (isa<FPMathOperator>(CI))
- MIB->copyIRFlags(CI);
+ if (isa<FPMathOperator>(CB))
+ MIB->copyIRFlags(CB);
- for (const auto &Arg : enumerate(CI.args())) {
+ for (const auto &Arg : enumerate(CB.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
- if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
@@ -2831,29 +2846,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
}
// Add a MachineMemOperand if it is a target mem intrinsic.
- TargetLowering::IntrinsicInfo Info;
- // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
- if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- Align Alignment = Info.align.value_or(
- DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
- LLT MemTy = Info.memVT.isSimple()
- ? getLLTForMVT(Info.memVT.getSimpleVT())
- : LLT::scalar(Info.memVT.getStoreSizeInBits());
+ if (TgtMemIntrinsicInfo) {
+ const Function *F = CB.getCalledFunction();
+
+ Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign(
+ TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext())));
+ LLT MemTy =
+ TgtMemIntrinsicInfo->memVT.isSimple()
+ ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT())
+ : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits());
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
// didn't yield anything useful.
MachinePointerInfo MPI;
- if (Info.ptrVal)
- MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
- else if (Info.fallbackAddressSpace)
- MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
+ if (TgtMemIntrinsicInfo->ptrVal)
+ MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal,
+ TgtMemIntrinsicInfo->offset);
+ else if (TgtMemIntrinsicInfo->fallbackAddressSpace)
+ MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace);
MIB.addMemOperand(MF->getMachineMemOperand(
- MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata(),
- /*Ranges=*/nullptr, Info.ssid, Info.order, Info.failureOrder));
+ MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata(),
+ /*Ranges=*/nullptr, TgtMemIntrinsicInfo->ssid,
+ TgtMemIntrinsicInfo->order, TgtMemIntrinsicInfo->failureOrder));
}
- if (CI.isConvergent()) {
- if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ if (CB.isConvergent()) {
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
Register TokenReg = getOrCreateVReg(*Token);
MIB.addUse(TokenReg, RegState::Implicit);
@@ -3006,10 +3024,41 @@ bool IRTranslator::translateInvoke(const User &U,
return true;
}
+/// The intrinsics currently supported by callbr are implicit control flow
+/// intrinsics such as amdgcn.kill.
bool IRTranslator::translateCallBr(const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: Implement this.
- return false;
+ if (containsBF16Type(U))
+ return false; // see translateCall
+
+ const CallBrInst &I = cast<CallBrInst>(U);
+ MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
+
+ // FIXME: inline asm not yet supported for callbr in GlobalISel As soon as we
+ // add support, we need to handle the indirect asm targets, see
+ // SelectionDAGBuilder::visitCallBr().
+ if (I.isInlineAsm())
+ return false;
+ if (I.getIntrinsicID() == Intrinsic::not_intrinsic)
+ return false;
+ if (!translateTargetIntrinsic(I, I.getIntrinsicID(), MIRBuilder))
+ return false;
+
+ // Retrieve successors.
+ SmallPtrSet<BasicBlock *, 8> Dests;
+ Dests.insert(I.getDefaultDest());
+ MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ CallBrMBB->normalizeSuccProbs();
+
+ return true;
}
bool IRTranslator::translateLandingPad(const User &U,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 74c14ede24755..854ac52ae1095 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3471,16 +3471,40 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
DAG.getBasicBlock(Return)));
}
+/// The intrinsics currently supported by callbr are implicit control flow
+/// intrinsics such as amdgcn.kill.
+/// - they should be called (no "dontcall-" attributes)
+/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic())
+/// - they do not need custom argument handling (no
+/// TLI.CollectTargetIntrinsicOperands())
+void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ SmallVector<SDValue, 8> Ops =
+ getTargetIntrinsicOperands(I, HasChain, OnlyLoad);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
+
+ // Create the node.
+ SDValue Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs);
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
+
+ setValue(&I, Result);
+}
+
void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
- // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
- // have to do anything here to lower funclet bundles.
- failForInvalidBundles(I, "callbrs",
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet});
-
- assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
- visitInlineAsm(I);
+ if (I.isInlineAsm()) {
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ failForInvalidBundles(I, "callbrs",
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet});
+ visitInlineAsm(I);
+ } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+ visitCallBrIntrinsic(I);
+ } else {
+ report_fatal_error("only know how to handle inlineasm/intrinsic callbr");
+ }
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
@@ -3490,19 +3514,25 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
- MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
- Target->setIsInlineAsmBrIndirectTarget();
- // If we introduce a type of asm goto statement that is permitted to use an
- // indirect call instruction to jump to its labels, then we should add a
- // call to Target->setMachineBlockAddressTaken() here, to mark the target
- // block as requiring a BTI.
-
- Target->setLabelMustBeEmitted();
- // Don't add duplicate machine successors.
- if (Dests.insert(Dest).second)
- addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ // If we introduce a type of asm goto statement that is permitted to use
+ // an indirect call instruction to jump to its labels, then we should add
+ // a call to Target->setMachineBlockAddressTaken() here, to mark the
+ // target block as requiring a BTI.
+
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
}
CallBrMBB->normalizeSuccProbs();
@@ -5304,18 +5334,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
DAG.setRoot(OutChain);
}
-/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
-/// node.
-void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
- unsigned Intrinsic) {
- // Ignore the callsite's attributes. A specific call site may be marked with
- // readnone, but the lowering code will expect the chain based on the
- // definition.
+/// Check if this intrinsic call depends on the chain (1st return value)
+/// and if it only *loads* memory.
+/// Ignore the callsite's attributes. A specific call site may be marked with
+/// readnone, but the lowering code will expect the chain based on the
+/// definition.
+std::pair<bool, bool>
+SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) {
const Function *F = I.getCalledFunction();
bool HasChain = !F->doesNotAccessMemory();
bool OnlyLoad =
HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow();
+ return {HasChain, OnlyLoad};
+}
+
+SmallVector<SDValue, 8> SelectionDAGBuilder::getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
// Build the operand list.
SmallVector<SDValue, 8> Ops;
if (HasChain) { // If this intrinsic has side-effects, chainify it.
@@ -5327,17 +5365,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
- // Info is set by getTgtMemIntrinsic
- TargetLowering::IntrinsicInfo Info;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
- DAG.getMachineFunction(),
- Intrinsic);
-
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
- Info.opc == ISD::INTRINSIC_W_CHAIN)
- Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
+ if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID ||
+ TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN)
+ Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(),
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
@@ -5360,13 +5391,94 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
}
}
+ if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ assert(Ops.back().getValueType() != MVT::Glue &&
+ "Did not expected another glue node here.");
+ ConvControlToken =
+ DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+
+ return Ops;
+}
+
+SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I,
+ bool HasChain) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs);
+ return DAG.getVTList(ValueVTs);
+}
+
+/// Get an INTRINSIC node for a target intrinsic which does not touch touch
+/// memory.
+SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode(
+ const CallBase &I, bool HasChain, SmallVector<SDValue, 8> &Ops,
+ SDVTList &VTs) {
+ SDValue Result;
+
+ if (!HasChain) {
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else if (!I.getType()->isVoidTy()) {
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
+ } else {
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ }
+
+ return Result;
+}
+
+/// Set root, convert return type if necessaey and check alignment.
+SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I,
+ bool HasChain,
+ bool OnlyLoad,
+ SDValue Result) {
+ if (HasChain) {
+ SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1);
+ if (OnlyLoad)
+ PendingLoads.push_back(Chain);
+ else
+ DAG.setRoot(Chain);
+ }
+
+ if (I.getType()->isVoidTy())
+ return Result;
+
+ if (!isa<VectorType>(I.getType()))
+ Result = lowerRangeToAssertZExt(DAG, I, Result);
+
+ MaybeAlign Alignment = I.getRetAlign();
+
+ // Insert `assertalign` node if there's an alignment.
+ if (InsertAssertAlign && Alignment) {
+ Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+ }
+
+ return Result;
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I);
+
+ // Info is set by getTgtMemIntrinsic
+ TargetLowering::IntrinsicInfo Info;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool IsTgtMemIntrinsic =
+ TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic);
+
+ SmallVector<SDValue, 8> Ops = getTargetIntrinsicOperands(
+ I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr);
+ SDVTList VTs = getTargetIntrinsicVTList(I, HasChain);
// Propagate fast-math-flags from IR to node(s).
SDNodeFlags Flags;
@@ -5377,19 +5489,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
- if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
- auto *Token = Bundle->Inputs[0].get();
- SDValue ConvControlToken = getValue(Token);
- assert(Ops.back().getValueType() != MVT::Glue &&
- "Did not expected another glue node here.");
- ConvControlToken =
- DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
- Ops.push_back(ConvControlToken);
- }
-
// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
- if (IsTgtIntrinsic) {
+ if (IsTgtMemIntrinsic) {
// This is target intrinsic that touches memory
//
// TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
@@ -5409,34 +5511,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Info.ssid, Info.order, Info.failureOrder);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, MemVT, MMO);
- } else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
- } else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
+ Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs);
}
- if (HasChain) {
- SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
- if (OnlyLoad)
- PendingLoads.push_back(Chain);
- else
- DAG.setRoot(Chain);
- }
-
- if (!I.getType()->isVoidTy()) {
- if (!isa<VectorType>(I.getType()))
- Result = lowerRangeToAssertZExt(DAG, I, Result);
-
- MaybeAlign Alignment = I.getRetAlign();
-
- // Insert `assertalign` node if there's an alignment.
- if (InsertAssertAlign && Alignment) {
- Result =
- DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
- }
- }
+ Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result);
setValue(&I, Result);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 1c278076a219d..5052536b76fba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -535,10 +535,12 @@ class SelectionDAGBuilder {
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
- void visitCallBr(const CallBrInst &I);
void visitCallBrLandingPad(const CallInst &I);
void visitResume(const ResumeInst &I);
+ void visitCallBr(const CallBrInst &I);
+ void visitCallBrIntrinsic(const CallBrInst &I);
+
void visitUnary(const User &I, unsigned Opcode);
void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); }
@@ -708,6 +710,17 @@ class SelectionDAGBuilder {
MCSymbol *&BeginLabel);
SDValue lowerEndEH(SDValue Chain, const InvokeInst *II,
const BasicBlock *EHPadBB, MCSymbol *BeginLabel);
+
+ std::pair<bool, bool> getTargetIntrinsicCallProperties(const CallBase &I);
+ SmallVector<SDValue, 8> getTargetIntrinsicOperands(
+ const CallBase &I, bool HasChain, bool OnlyLoad,
+ TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr);
+ SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain);
+ SDValue getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain,
+ SmallVector<SDValue, 8> &Ops,
+ SDVTList &VTs);
+ SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain,
+ bool OnlyLoad, SDValue Result);
};
/// This struct represents the registers (physical or virtual)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8c8ed3c5e47ba..86fa8e6c7b373 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3317,11 +3317,34 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
}
void Verifier::visitCallBrInst(CallBrInst &CBI) {
- Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
- const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
- Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+ if (!CBI.isInlineAsm()) {
+ Check(CBI.getCalledFunction(),
+ "Callbr: indirect function / invalid signature");
+ Check(!CBI.hasOperandBundles(),
+ "Callbr for intrinsics currently doesn't support operand bundles");
+
+ switch (CBI.getIntrinsicID()) {
+ case Intrinsic::amdgcn_kill: {
+ Check(CBI.getNumIndirectDests() == 1,
+ "Callbr amdgcn_kill only supports one indirect dest");
+ bool Unreachable = isa<UnreachableInst>(CBI.getIndirectDest(0)->begin());
+ CallInst *Call = dyn_cast<CallInst>(CBI.getIndirectDest(0)->begin());
+ Check(Unreachable || (Call && Call->getIntrinsicID() ==
+ Intrinsic::amdgcn_unreachable),
+ "Callbr amdgcn_kill indirect dest needs to be unreachable");
+ break;
+ }
+ default:
+ CheckFailed(
+ "Callbr currently only supports asm-goto and selected intrinsics");
+ }
+ visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
+ } else {
+ const InlineAsm *IA = cast<InlineAsm>(CBI.getCalledOperand());
+ Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
- verifyInlineAsmCall(CBI);
+ verifyInlineAsmCall(CBI);
+ }
visitTerminator(CBI);
}
@@ -5330,7 +5353,7 @@ void Verifier::visitInstruction(Instruction &I) {
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
- Check(!F->isIntrinsic() || isa<CallInst>(I) ||
+ Check(!F->isIntrinsic() || isa<CallInst>(I) || isa<CallBrInst>(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
F->getIntrinsicID() == Intrinsic::seh_try_end ||
diff --git a/llvm/test/Assembler/callbr.ll b/llvm/test/Assembler/callbr.ll
new file mode 100644
index 0000000000000..37fd777e1a395
--- /dev/null
+++ b/llvm/test/Assembler/callbr.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare void @llvm.amdgcn.kill(i1)
+
+define void @test_kill(i1 %c) {
+; CHECK-LABEL: define void @test_kill(
+; CHECK-SAME: i1 [[C:%.*]]) {
+; CHECK-NEXT: callbr void @llvm.amdgcn.kill(i1 [[C]])
+; CHECK-NEXT: to label %[[CONT:.*]] [label %kill]
+; CHECK: [[KILL:.*:]]
+; CHECK-NEXT: unreachable
+; CHECK: [[CONT]]:
+; CHECK-NEXT: ret void
+;
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+ unreachable
+cont:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
new file mode 100644
index 0000000000000..ff1e23836fed2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefix=GISEL %s
+
+define void @test_kill(ptr %src, ptr %dst, i1 %c) {
+; CHECK-LABEL: test_kill:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v0, v[0:1]
+; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; CHECK-NEXT: s_mov_b64 s[4:5], exec
+; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
+; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_store_dword v[2:3], v0
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: s_mov_b64 exec, 0
+; CHECK-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_kill:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_load_dword v0, v[0:1]
+; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT: s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_store_dword v[2:3], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB0_2:
+; GISEL-NEXT: s_mov_b64 exec, 0
+; GISEL-NEXT: s_endpgm
+ %a = load i32, ptr %src, align 4
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+ unreachable
+cont:
+ store i32 %a, ptr %dst, align 4
+ ret void
+}
+
+define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
+; CHECK-LABEL: test_kill_block_order:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v0, v[0:1]
+; CHECK-NEXT: v_and_b32_e32 v1, 1, v4
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
+; CHECK-NEXT: s_mov_b64 s[4:5], exec
+; CHECK-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; CHECK-NEXT: s_cbranch_scc0 .LBB1_2
+; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_and_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_store_dword v[2:3], v0
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: s_mov_b64 exec, 0
+; CHECK-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_kill_block_order:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_load_dword v0, v[0:1]
+; GISEL-NEXT: v_and_b32_e32 v1, 1, v4
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc
+; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-NEXT: s_cbranch_scc0 .LBB1_2
+; GISEL-NEXT: ; %bb.1:
+; GISEL-NEXT: s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: flat_store_dword v[2:3], v0
+; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+; GISEL-NEXT: .LBB1_2:
+; GISEL-NEXT: s_mov_b64 exec, 0
+; GISEL-NEXT: s_endpgm
+ %a = load i32, ptr %src, align 4
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+cont:
+ store i32 %a, ptr %dst, align 4
+ ret void
+kill:
+ unreachable
+}
diff --git a/llvm/test/Verifier/callbr.ll b/llvm/test/Verifier/callbr.ll
index 9b819c5fed48b..b32c99a412d15 100644
--- a/llvm/test/Verifier/callbr.ll
+++ b/llvm/test/Verifier/callbr.ll
@@ -120,3 +120,59 @@ landingpad:
%out = call i32 @llvm.callbr.landingpad.i32(i32 %0)
ret i32 %out
}
+
+declare void @llvm.amdgcn.kill(i1)
+
+; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest
+define void @test_callbr_intrinsic_indirect0(i1 %c) {
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont []
+kill:
+ unreachable
+cont:
+ ret void
+}
+
+; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest
+define void @test_callbr_intrinsic_indirect2(i1 %c) {
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill1, label %kill2]
+kill1:
+ unreachable
+kill2:
+ unreachable
+cont:
+ ret void
+}
+
+; CHECK-NEXT: Callbr amdgcn_kill indirect dest needs to be unreachable
+define void @test_callbr_intrinsic_no_unreachable(i1 %c) {
+ callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill]
+kill:
+ ret void
+cont:
+ ret void
+}
+
+; CHECK-NEXT: Callbr currently only supports asm-goto and selected intrinsics
+declare i32 @llvm.amdgcn.workitem.id.x()
+define void @test_callbr_intrinsic_unsupported() {
+ callbr i32 @llvm.amdgcn.workitem.id.x() to label %cont []
+cont:
+ ret void
+}
+
+; CHECK-NEXT: Callbr: indirect function / invalid signature
+define void @test_callbr_intrinsic_wrong_signature(ptr %ptr) {
+ %func = load ptr, ptr %ptr, align 8
+ callbr void %func() to label %cont []
+cont:
+ ret void
+}
+
+; CHECK-NEXT: Callbr for intrinsics currently doesn't support operand bundles
+define void @test_callbr_intrinsic_no_operand_bundles(i1 %c) {
+ callbr void @llvm.amdgcn.kill(i1 %c) [ "foo"(i1 %c) ] to label %cont [label %kill]
+kill:
+ unreachable
+cont:
+ ret void
+}
diff --git a/polly/test/ScopDetect/callbr.ll b/polly/test/ScopDetect/callbr.ll
index 4182974693678..75f676afd79c4 100644
--- a/polly/test/ScopDetect/callbr.ll
+++ b/polly/test/ScopDetect/callbr.ll
@@ -1,10 +1,7 @@
-; RUN: opt %loadNPMPolly '-passes=print<polly-detect>' -polly-detect-track-failures -disable-output -pass-remarks-missed=polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REMARK
-; RUN: opt %loadNPMPolly '-passes=print<polly-detect>' -polly-detect-track-failures -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STAT
-; REQUIRES: asserts
+; RUN: opt %loadNPMPolly '-passes=print<polly-detect>' -disable-output < %s 2>&1 | FileCheck %s
-; REMARK: Branch from indirect terminator.
-
-; STAT: 1 polly-detect - Number of rejected regions: Branch from indirect terminator
+; CHECK-LABEL: func
+; CHECK-NOT: Valid
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
More information about the llvm-commits
mailing list