[llvm] r358681 - [SDA] Bug fix: Use IPD outside the loop as divergence bound
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 18 09:17:36 PDT 2019
Author: nha
Date: Thu Apr 18 09:17:35 2019
New Revision: 358681
URL: http://llvm.org/viewvc/llvm-project?rev=358681&view=rev
Log:
[SDA] Bug fix: Use IPD outside the loop as divergence bound
Summary:
The immediate post dominator of the loop header may be part of the divergent loop.
Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop.
Reviewers: nhaehnle
Reviewed By: nhaehnle
Subscribers: mmasten, arsenm, jvesely, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59042
Modified:
llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp
llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
Modified: llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp?rev=358681&r1=358680&r2=358681&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp Thu Apr 18 09:17:35 2019
@@ -218,14 +218,9 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
assert(JoinBlocks);
- // immediate post dominator (no join block beyond that block)
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnaly
// already available in cache?
auto ItCached = CachedLoopExitJoins.find(&Loop);
- if (ItCached != CachedLoopExitJoins.end())
+ if (ItCached != CachedLoopExitJoins.end()) {
return *ItCached->second;
+ }
+
+ // dont propagte beyond the immediate post dom of the loop
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
+ IpdNode = IpdNode->getIDom();
+ PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+ }
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(cons
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
+ // dont propagate beyond the immediate post dominator of the branch
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll?rev=358681&r1=358680&r2=358681&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll Thu Apr 18 09:17:35 2019
@@ -21,6 +21,43 @@ merge:
ret void
}
+define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %cond.var = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %cond.var = icmp
+ %cond.uni = icmp slt i32 %n, 0
+; CHECK-NOT: DIVERGENT: %cond.uni = icmp
+ br label %for.header
+for.header:
+ br i1 %cond.var, label %A, label %B
+A:
+ br label %C
+B:
+ br label %C
+C:
+ br i1 %cond.uni, label %E, label %D
+D:
+ br i1 %cond.var, label %for.header, label %F
+
+E:
+ %e.lcssa.uni = phi i32 [ 0, %C ]
+; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
+ br label %G
+
+F:
+ %f.lcssa.uni = phi i32 [ 1, %D ]
+; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
+ br label %G
+
+G:
+ %g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
+; CHECK: DIVERGENT: %g.join.var = phi i32
+ ret void
+}
+
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
More information about the llvm-commits
mailing list