[llvm] r358681 - [SDA] Bug fix: Use IPD outside the loop as divergence bound

Nicolai Haehnle via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 18 09:17:36 PDT 2019


Author: nha
Date: Thu Apr 18 09:17:35 2019
New Revision: 358681

URL: http://llvm.org/viewvc/llvm-project?rev=358681&view=rev
Log:
[SDA] Bug fix: Use IPD outside the loop as divergence bound

Summary:
The immediate post dominator of the loop header may be part of the divergent loop.
Since this /was/ the divergence propagation bound the SDA would not detect joins of divergent paths outside the loop.

Reviewers: nhaehnle

Reviewed By: nhaehnle

Subscribers: mmasten, arsenm, jvesely, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59042

Modified:
    llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp
    llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll

Modified: llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp?rev=358681&r1=358680&r2=358681&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp (original)
+++ llvm/trunk/lib/Analysis/SyncDependenceAnalysis.cpp Thu Apr 18 09:17:35 2019
@@ -218,14 +218,9 @@ struct DivergencePropagator {
   template <typename SuccessorIterable>
   std::unique_ptr<ConstBlockSet>
   computeJoinPoints(const BasicBlock &RootBlock,
-                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+                    SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
     assert(JoinBlocks);
 
-    // immediate post dominator (no join block beyond that block)
-    const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
-    const auto *IpdNode = PdNode->getIDom();
-    const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
     // bootstrap with branch targets
     for (const auto *SuccBlock : NodeSuccessors) {
       DefMap.emplace(SuccBlock, SuccBlock);
@@ -340,13 +335,23 @@ const ConstBlockSet &SyncDependenceAnaly
 
   // already available in cache?
   auto ItCached = CachedLoopExitJoins.find(&Loop);
-  if (ItCached != CachedLoopExitJoins.end())
+  if (ItCached != CachedLoopExitJoins.end()) {
     return *ItCached->second;
+  }
+
+  // dont propagte beyond the immediate post dom of the loop
+  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
+  const auto *IpdNode = PdNode->getIDom();
+  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+  while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
+    IpdNode = IpdNode->getIDom();
+    PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+  }
 
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
-      *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+      *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
 
   auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
   assert(ItInserted.second);
@@ -365,11 +370,16 @@ SyncDependenceAnalysis::join_blocks(cons
   if (ItCached != CachedBranchJoins.end())
     return *ItCached->second;
 
+  // dont propagate beyond the immediate post dominator of the branch
+  const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
+  const auto *IpdNode = PdNode->getIDom();
+  const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
   // compute all join points
   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
   const auto &TermBlock = *Term.getParent();
   auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
-      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+      TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
 
   auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
   assert(ItInserted.second);

Modified: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll?rev=358681&r1=358680&r2=358681&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll (original)
+++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/hidden_diverge.ll Thu Apr 18 09:17:35 2019
@@ -21,6 +21,43 @@ merge:
   ret void
 }
 
+define amdgpu_kernel void @hidden_loop_ipd(i32 %n, i32 %a, i32 %b) #0 {
+; CHECK-LABEL: Printing analysis 'Legacy Divergence Analysis' for function 'hidden_loop_ipd'
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %cond.var = icmp slt i32 %tid, 0
+; CHECK: DIVERGENT: %cond.var = icmp
+  %cond.uni = icmp slt i32 %n, 0
+; CHECK-NOT: DIVERGENT: %cond.uni = icmp
+  br label %for.header
+for.header:
+  br i1 %cond.var, label %A, label %B
+A:
+  br label %C
+B:
+  br label %C
+C:
+  br i1 %cond.uni, label %E, label %D
+D:
+  br i1 %cond.var, label %for.header, label %F
+
+E:
+  %e.lcssa.uni = phi i32 [ 0, %C ]
+; CHECK-NOT: DIVERGENT: %e.lcssa.uni = phi i32
+  br label %G
+
+F:
+  %f.lcssa.uni = phi i32 [ 1, %D ]
+; CHECK-NOT: DIVERGENT: %f.lcssa.uni = phi i32
+  br label %G
+
+G:
+  %g.join.var = phi i32 [ %e.lcssa.uni, %E ], [ %f.lcssa.uni, %F ]
+; CHECK: DIVERGENT: %g.join.var = phi i32
+  ret void
+}
+
+
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }




More information about the llvm-commits mailing list