[llvm] 53b68e6 - Resubmit: [JumpThreading] Thread jumps through two basic blocks
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 12:34:02 PST 2020
Author: Kazu Hirata
Date: 2020-01-16T12:33:37-08:00
New Revision: 53b68e676faf208b4a8f817e9bd4ddd522cc6006
URL: https://github.com/llvm/llvm-project/commit/53b68e676faf208b4a8f817e9bd4ddd522cc6006
DIFF: https://github.com/llvm/llvm-project/commit/53b68e676faf208b4a8f817e9bd4ddd522cc6006.diff
LOG: Resubmit: [JumpThreading] Thread jumps through two basic blocks
This reverts commit 2d258ed931cdf47a7d1dcf08ad963b5452a8670f. This
revision fixes the Windows build and adds a testcase for it, namely
thread-two-bbs3.ll. My original patch improperly copied EH pads on
Windows. This patch disregards jump threading opportunities having to
do with EH pads.
[JumpThreading] Thread jumps through two basic blocks
Summary:
This patch teaches JumpThreading.cpp to thread through two basic
blocks like:
bb3:
%var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb4:
%cmp = icmp eq i32* %var, null
br i1 %cmp, label bb5, label bb6
by duplicating basic blocks like bb3 above. Once we duplicate bb3 as
bb3.dup and redirect edge bb2->bb3 to bb2->bb3.dup, we have:
bb3:
%var = phi i32* [ @a, %bb2 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb3.dup:
%var = phi i32* [ null, %bb1 ]
%tobool = icmp eq i32 %cond, 0
br i1 %tobool, label %bb4, label ...
bb4:
%cmp = icmp eq i32* %var, null
br i1 %cmp, label bb5, label bb6
Then the existing code in JumpThreading.cpp can thread edge
bb3.dup->bb4 through bb4 and eventually create bb3.dup->bb5.
Reviewers: wmi
Subscribers: hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70247
Added:
llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll
llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll
llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll
Modified:
llvm/include/llvm/Transforms/Scalar/JumpThreading.h
llvm/lib/Transforms/Scalar/JumpThreading.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index a8beb8ca6e05..285d3489345c 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -139,6 +139,11 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
RecursionSet, CxtI);
}
+ Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
+ Value *cond);
+ bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond);
+ void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB,
+ BasicBlock *BB, BasicBlock *SuccBB);
bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
jumpthreading::ConstantPreference Preference,
Instruction *CxtI = nullptr);
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 98c2fcb3dae0..37413702ce8f 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1548,6 +1548,52 @@ FindMostPopularDest(BasicBlock *BB,
return MostPopularDest;
}
+// Try to evaluate the value of V when the control flows from PredPredBB to
+// BB->getSinglePredecessor() and then on to BB.
+Constant *JumpThreadingPass::EvaluateOnPredecessorEdge(BasicBlock *BB,
+ BasicBlock *PredPredBB,
+ Value *V) {
+ BasicBlock *PredBB = BB->getSinglePredecessor();
+ assert(PredBB && "Expected a single predecessor");
+
+ if (Constant *Cst = dyn_cast<Constant>(V)) {
+ return Cst;
+ }
+
+ // Consult LVI if V is not an instruction in BB or PredBB.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || (I->getParent() != BB && I->getParent() != PredBB)) {
+ if (DTU->hasPendingDomTreeUpdates())
+ LVI->disableDT();
+ else
+ LVI->enableDT();
+ return LVI->getConstantOnEdge(V, PredPredBB, PredBB, nullptr);
+ }
+
+ // Look into a PHI argument.
+ if (PHINode *PHI = dyn_cast<PHINode>(V)) {
+ if (PHI->getParent() == PredBB)
+ return dyn_cast<Constant>(PHI->getIncomingValueForBlock(PredPredBB));
+ return nullptr;
+ }
+
+ // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
+ if (CondCmp->getParent() == BB) {
+ Constant *Op0 =
+ EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0));
+ Constant *Op1 =
+ EvaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1));
+ if (Op0 && Op1) {
+ return ConstantExpr::getCompare(CondCmp->getPredicate(), Op0, Op1);
+ }
+ }
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
ConstantPreference Preference,
Instruction *CxtI) {
@@ -1557,8 +1603,12 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
return false;
PredValueInfoTy PredValues;
- if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference, CxtI))
- return false;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference,
+ CxtI)) {
+ // We don't have known values in predecessors. See if we can thread through
+ // BB and its sole predecessor.
+ return MaybeThreadThroughTwoBasicBlocks(BB, Cond);
+ }
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -2015,6 +2065,186 @@ JumpThreadingPass::CloneInstructions(BasicBlock::iterator BI,
return ValueMapping;
}
+/// Attempt to thread through two successive basic blocks.
+bool JumpThreadingPass::MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB,
+ Value *Cond) {
+ // Consider:
+ //
+ // PredBB:
+ // %var = phi i32* [ null, %bb1 ], [ @a, %bb2 ]
+ // %tobool = icmp eq i32 %cond, 0
+ // br i1 %tobool, label %BB, label ...
+ //
+ // BB:
+ // %cmp = icmp eq i32* %var, null
+ // br i1 %cmp, label ..., label ...
+ //
+ // We don't know the value of %var at BB even if we know which incoming edge
+ // we take to BB. However, once we duplicate PredBB for each of its incoming
+ // edges (say, PredBB1 and PredBB2), we know the value of %var in each copy of
+ // PredBB. Then we can thread edges PredBB1->BB and PredBB2->BB through BB.
+
+ // Require that BB end with a Branch for simplicity.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!CondBr)
+ return false;
+
+ // BB must have exactly one predecessor.
+ BasicBlock *PredBB = BB->getSinglePredecessor();
+ if (!PredBB)
+ return false;
+
+ // Require that PredBB end with a Branch. If PredBB ends with an
+ // unconditional branch, we should be merging PredBB and BB instead. For
+ // simplicity, we don't deal with a switch.
+ BranchInst *PredBBBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+ if (!PredBBBranch)
+ return false;
+
+ // If PredBB has exactly one incoming edge, we don't gain anything by copying
+ // PredBB.
+ if (PredBB->getSinglePredecessor())
+ return false;
+
+ // Don't thread across a loop header.
+ if (LoopHeaders.count(PredBB))
+ return false;
+
+ // Avoid complication with duplicating EH pads.
+ if (PredBB->isEHPad())
+ return false;
+
+ // Find a predecessor that we can thread. For simplicity, we only consider a
+ // successor edge out of BB to which we thread exactly one incoming edge into
+ // PredBB.
+ unsigned ZeroCount = 0;
+ unsigned OneCount = 0;
+ BasicBlock *ZeroPred = nullptr;
+ BasicBlock *OnePred = nullptr;
+ for (BasicBlock *P : predecessors(PredBB)) {
+ if (Constant *Cst = EvaluateOnPredecessorEdge(BB, P, Cond)) {
+ if (Cst->isZeroValue()) {
+ ZeroCount++;
+ ZeroPred = P;
+ } else {
+ OneCount++;
+ OnePred = P;
+ }
+ }
+ }
+
+ // Disregard complicated cases where we have to thread multiple edges.
+ BasicBlock *PredPredBB;
+ if (ZeroCount == 1) {
+ PredPredBB = ZeroPred;
+ } else if (OneCount == 1) {
+ PredPredBB = OnePred;
+ } else {
+ return false;
+ }
+
+ BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred);
+
+ // If threading to the same block as we come from, we would infinite loop.
+ if (SuccBB == BB) {
+ LLVM_DEBUG(dbgs() << " Not threading across BB '" << BB->getName()
+ << "' - would thread to self!\n");
+ return false;
+ }
+
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB) || LoopHeaders.count(SuccBB)) {
+ LLVM_DEBUG({
+ bool BBIsHeader = LoopHeaders.count(BB);
+ bool SuccIsHeader = LoopHeaders.count(SuccBB);
+ dbgs() << " Not threading across "
+ << (BBIsHeader ? "loop header BB '" : "block BB '")
+ << BB->getName() << "' to dest "
+ << (SuccIsHeader ? "loop header BB '" : "block BB '")
+ << SuccBB->getName()
+ << "' - it might create an irreducible loop!\n";
+ });
+ return false;
+ }
+
+ // Check the cost of duplicating BB and PredBB.
+ unsigned JumpThreadCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ JumpThreadCost += getJumpThreadDuplicationCost(
+ PredBB, PredBB->getTerminator(), BBDupThreshold);
+ if (JumpThreadCost > BBDupThreshold) {
+ LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
+ << "' - Cost is too high: " << JumpThreadCost << "\n");
+ return false;
+ }
+
+ // Now we are ready to duplicate PredBB.
+ ThreadThroughTwoBasicBlocks(PredPredBB, PredBB, BB, SuccBB);
+ return true;
+}
+
+void JumpThreadingPass::ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
+ BasicBlock *PredBB,
+ BasicBlock *BB,
+ BasicBlock *SuccBB) {
+ LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
+ << BB->getName() << "'\n");
+
+ BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
+ BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
+
+ BasicBlock *NewBB =
+ BasicBlock::Create(PredBB->getContext(), PredBB->getName() + ".thread",
+ PredBB->getParent(), PredBB);
+ NewBB->moveAfter(PredBB);
+
+ // Set the block frequency of NewBB.
+ if (HasProfileData) {
+ auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
+ BPI->getEdgeProbability(PredPredBB, PredBB);
+ BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+ }
+
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in PredBB, evaluate them
+ // to account for entry from PredPredBB.
+ DenseMap<Instruction *, Value *> ValueMapping =
+ CloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
+
+ // Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
+ // This eliminates predecessors from PredPredBB, which requires us to simplify
+ // any PHI nodes in PredBB.
+ Instruction *PredPredTerm = PredPredBB->getTerminator();
+ for (unsigned i = 0, e = PredPredTerm->getNumSuccessors(); i != e; ++i)
+ if (PredPredTerm->getSuccessor(i) == PredBB) {
+ PredBB->removePredecessor(PredPredBB, true);
+ PredPredTerm->setSuccessor(i, NewBB);
+ }
+
+ AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(0), PredBB, NewBB,
+ ValueMapping);
+ AddPHINodeEntriesForMappedBlock(PredBBBranch->getSuccessor(1), PredBB, NewBB,
+ ValueMapping);
+
+ DTU->applyUpdatesPermissive(
+ {{DominatorTree::Insert, NewBB, CondBr->getSuccessor(0)},
+ {DominatorTree::Insert, NewBB, CondBr->getSuccessor(1)},
+ {DominatorTree::Insert, PredPredBB, NewBB},
+ {DominatorTree::Delete, PredPredBB, PredBB}});
+
+ UpdateSSA(PredBB, NewBB, ValueMapping);
+
+ // Clean up things like PHI nodes with single operands, dead instructions,
+ // etc.
+ SimplifyInstructionsInBlock(NewBB, TLI);
+ SimplifyInstructionsInBlock(PredBB, TLI);
+
+ SmallVector<BasicBlock *, 1> PredsToFactor;
+ PredsToFactor.push_back(NewBB);
+ ThreadEdge(BB, PredsToFactor, SuccBB);
+}
+
/// TryThreadEdge - Thread an edge if it's safe and profitable to do so.
bool JumpThreadingPass::TryThreadEdge(
BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs,
diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll
new file mode 100644
index 000000000000..1b5f5cb14aee
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs1.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at a = global i32 0, align 4
+
+define void @foo(i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @foo
+; CHECK-LABEL: entry
+entry:
+ %tobool = icmp eq i32 %cond1, 0
+ br i1 %tobool, label %bb.cond2, label %bb.f1
+
+bb.f1:
+ call void @f1()
+ br label %bb.cond2
+; Verify that we branch on cond2 without checking ptr.
+; CHECK: call void @f1()
+; CHECK-NEXT: icmp eq i32 %cond2, 0
+; CHECK-NEXT: label %bb.f4, label %bb.f2
+
+bb.cond2:
+ %ptr = phi i32* [ null, %bb.f1 ], [ @a, %entry ]
+ %tobool1 = icmp eq i32 %cond2, 0
+ br i1 %tobool1, label %bb.file, label %bb.f2
+; Verify that we branch on cond2 without checking ptr.
+; CHECK: icmp eq i32 %cond2, 0
+; CHECK-NEXT: label %bb.f3, label %bb.f2
+
+bb.f2:
+ call void @f2()
+ br label %exit
+
+; Verify that we eliminate this basic block.
+; CHECK-NOT: bb.file:
+bb.file:
+ %cmp = icmp eq i32* %ptr, null
+ br i1 %cmp, label %bb.f4, label %bb.f3
+
+bb.f3:
+ call void @f3()
+ br label %exit
+
+bb.f4:
+ call void @f4()
+ br label %exit
+
+exit:
+ ret void
+}
+
+declare void @f1()
+
+declare void @f2()
+
+declare void @f3()
+
+declare void @f4()
diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll
new file mode 100644
index 000000000000..ebb7ce013eb0
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs2.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %cond1, i32 %cond2) {
+; CHECK-LABEL: @foo
+; CHECK-LABEL: entry
+entry:
+ %tobool = icmp ne i32 %cond1, 0
+ br i1 %tobool, label %bb.f1, label %bb.f2
+
+bb.f1:
+ call void @f1()
+ br label %bb.cond2
+; Verify that we branch on cond2 without checking tobool again.
+; CHECK: call void @f1()
+; CHECK-NEXT: icmp eq i32 %cond2, 0
+; CHECK-NEXT: label %exit, label %bb.f3
+
+bb.f2:
+ call void @f2()
+ br label %bb.cond2
+; Verify that we branch on cond2 without checking tobool again.
+; CHECK: call void @f2()
+; CHECK-NEXT: icmp eq i32 %cond2, 0
+; CHECK-NEXT: label %exit, label %bb.f4
+
+bb.cond2:
+ %tobool1 = icmp eq i32 %cond2, 0
+ br i1 %tobool1, label %exit, label %bb.cond1again
+
+; Verify that we eliminate this basic block.
+; CHECK-NOT: bb.cond1again:
+bb.cond1again:
+ br i1 %tobool, label %bb.f3, label %bb.f4
+
+bb.f3:
+ call void @f3()
+ br label %exit
+
+bb.f4:
+ call void @f4()
+ br label %exit
+
+exit:
+ ret void
+}
+
+declare void @f1() local_unnamed_addr
+
+declare void @f2() local_unnamed_addr
+
+declare void @f3() local_unnamed_addr
+
+declare void @f4() local_unnamed_addr
diff --git a/llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll b/llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll
new file mode 100644
index 000000000000..50d5d42afd77
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/thread-two-bbs3.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -jump-threading -S -verify | FileCheck %s
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.16.27026"
+
+; Verify that we do *not* thread any edge. On Windows, we used to
+; improperly duplicate EH pads like bb_cleanup below, resulting in an
+; assertion failure later down the pass pipeline.
+define void @foo([2 x i8]* %0) personality i8* bitcast (i32 ()* @baz to i8*) {
+; CHECK-LABEL: @foo
+; CHECK-NOT: bb_{{[^ ]*}}.thread:
+entry:
+ invoke void @bar()
+ to label %bb_invoke unwind label %bb_cleanuppad
+
+bb_invoke:
+ invoke void @bar()
+ to label %bb_exit unwind label %bb_cleanuppad
+
+bb_cleanuppad:
+ %index = phi i64 [ 1, %bb_invoke ], [ 0, %entry ]
+ %cond1 = phi i1 [ false, %bb_invoke ], [ true, %entry ]
+ %1 = cleanuppad within none []
+ br i1 %cond1, label %bb_action, label %bb_cleanupret
+
+bb_action:
+ %cond2 = icmp eq i64 %index, 0
+ br i1 %cond2, label %bb_cleanupret, label %bb_exit
+
+bb_exit:
+ call void @bar()
+ ret void
+
+bb_cleanupret:
+ cleanupret from %1 unwind to caller
+}
+
+declare void @bar()
+declare i32 @baz()
More information about the llvm-commits
mailing list