[llvm] r331094 - [LoopGuardWidening] Make PostDomTree optional

Fri Apr 27 16:15:57 PDT 2018

Author: reames
Date: Fri Apr 27 16:15:56 2018
New Revision: 331094

URL: http://llvm.org/viewvc/llvm-project?rev=331094&view=rev
Log:
[LoopGuardWidening] Make PostDomTree optional

The effect of doing so is not disrupting the LoopPassManager when mixing this pass with other loop passes.  This should help locality of access substaintially and avoids the cost of computing PostDom.

The assumption here is that the full GuardWidening (which does use PostDom) is run as a canonicalization before loop opts and that this version is just catching cases exposed by other loop passes.  (i.e. LoopPredication, IndVarSimplify, LoopUnswitch, etc..)


Modified:
    llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp
    llvm/trunk/test/Transforms/GuardWidening/loop-schedule.ll

Modified: llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp?rev=331094&r1=331093&r2=331094&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GuardWidening.cpp Fri Apr 27 16:15:56 2018
@@ -65,7 +65,7 @@ namespace {
 
 class GuardWideningImpl {
   DominatorTree &DT;
-  PostDominatorTree &PDT;
+  PostDominatorTree *PDT;
   LoopInfo &LI;
 
   /// Together, these describe the region of interest.  This might be all of
@@ -214,7 +214,7 @@ class GuardWideningImpl {
 
 public:
 
-  explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT,
+  explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
                              LoopInfo &LI, DomTreeNode *Root,
                              std::function<bool(BasicBlock*)> BlockFilter)
     : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {}
@@ -353,9 +353,8 @@ GuardWideningImpl::WideningScore GuardWi
   // 2) we have the extra cost of computing the guard condition in the common
   // case.  At the moment, we really only consider the second in our heuristic
   // here.  TODO: evaluate cost model for spurious deopt
-  bool HoistingOutOfIf =
-      !PDT.dominates(DominatedGuard->getParent(), DominatingGuard->getParent());
-
+  // NOTE: As written, this also lets us hoist right over another guard which
+  // is essentially just another spelling for control flow.  
   if (isWideningCondProfitable(DominatedGuard->getArgOperand(0),
                                DominatingGuard->getArgOperand(0)))
     return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive;
@@ -363,7 +362,26 @@ GuardWideningImpl::WideningScore GuardWi
   if (HoistingOutOfLoop)
     return WS_Positive;
 
-  return HoistingOutOfIf ? WS_IllegalOrNegative : WS_Neutral;
+  // Returns true if we might be hoisting above explicit control flow.  Note
+  // that this completely ignores implicit control flow (guards, calls which
+  // throw, etc...).  That choice appears arbitrary.
+  auto MaybeHoistingOutOfIf = [&]() {
+    auto *DominatingBlock = DominatingGuard->getParent();
+    auto *DominatedBlock = DominatedGuard->getParent();
+    
+    // Same Block?
+    if (DominatedBlock == DominatingBlock)
+      return false;
+    // Obvious successor (common loop header/preheader case)
+    if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
+      return false;
+    // TODO: diamond, triangle cases
+    if (!PDT) return true;
+    return !PDT->dominates(DominatedGuard->getParent(),
+                           DominatingGuard->getParent());
+  };
+
+  return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
 }
 
 bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc,
@@ -671,7 +689,7 @@ PreservedAnalyses GuardWideningPass::run
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
-  if (!GuardWideningImpl(DT, PDT, LI, DT.getRootNode(),
+  if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
                          [](BasicBlock*) { return true; } ).run())
     return PreservedAnalyses::all();
 
@@ -694,7 +712,7 @@ struct GuardWideningLegacyPass : public
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-    return GuardWideningImpl(DT, PDT, LI, DT.getRootNode(),
+    return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
                          [](BasicBlock*) { return true; } ).run();
   }
 
@@ -720,7 +738,8 @@ struct LoopGuardWideningLegacyPass : pub
       return false;
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-    auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
+    auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
     BasicBlock *RootBB = L->getLoopPredecessor();
     if (!RootBB)
       RootBB = L->getHeader();
@@ -734,7 +753,6 @@ struct LoopGuardWideningLegacyPass : pub
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     getLoopAnalysisUsage(AU);
-    AU.addRequired<PostDominatorTreeWrapperPass>();
     AU.addPreserved<PostDominatorTreeWrapperPass>();
   }
 };

Modified: llvm/trunk/test/Transforms/GuardWidening/loop-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GuardWidening/loop-schedule.ll?rev=331094&r1=331093&r2=331094&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GuardWidening/loop-schedule.ll (original)
+++ llvm/trunk/test/Transforms/GuardWidening/loop-schedule.ll Fri Apr 27 16:15:56 2018
@@ -3,13 +3,8 @@
 ; Main point of this test is to check the scheduling -- there should be
 ; no analysis passes needed between LICM and LoopGuardWidening
 
-; TODO: Because guard widdening currently requires post-dom, we end up
-; breaking the loop pass manager to compute it.  Need to either make all
-; loop passes preserve postdom (hard) or make it optional in guard widdening
 ; CHECK: Loop Pass Manager
 ; CHECK:   Loop Invariant Code Motion
-; CHECK: Post-Dominator Tree Construction
-; CHECK: Loop Pass Manager
 ; CHECK:   Widen guards (within a single loop, as a loop pass)
 ; CHECK:   Loop Invariant Code Motion
 
@@ -21,6 +16,7 @@ define void @iter(i32 %a, i32 %b, i1* %c
 ; CHECK:  %cond_1 = icmp ult i32 %b, 10
 ; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
 ; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK-LABEL: loop:
 
 entry:
   %cond_0 = icmp ult i32 %a, 10
@@ -39,3 +35,30 @@ leave.loopexit:
 leave:                                            ; preds = %leave.loopexit, %entry
   ret void
 }
+
+define void @within_loop(i32 %a, i32 %b, i1* %c_p) {
+; CHECK-LABEL @within_loop
+; CHECK:  %cond_0 = icmp ult i32 %a, 10
+; CHECK:  %cond_1 = icmp ult i32 %b, 10
+; CHECK:  %wide.chk = and i1 %cond_0, %cond_1
+; CHECK-LABEL: loop:
+; CHECK:  call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop.preheader, %loop
+  %cond_0 = icmp ult i32 %a, 10
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+  %cond_1 = icmp ult i32 %b, 10
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+  %cnd = load i1, i1* %c_p
+  br i1 %cnd, label %loop, label %leave.loopexit
+
+leave.loopexit:                                   ; preds = %loop
+  br label %leave
+
+leave:                                            ; preds = %leave.loopexit, %entry
+  ret void
+}
+