[llvm] r345387 - [SimpleLoopUnswitch] Unswitch by experimental.guard intrinsics

Max Kazantsev via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 26 07:20:11 PDT 2018


Author: mkazantsev
Date: Fri Oct 26 07:20:11 2018
New Revision: 345387

URL: http://llvm.org/viewvc/llvm-project?rev=345387&view=rev
Log:
[SimpleLoopUnswitch] Unswitch by experimental.guard intrinsics

This patch adds support of `llvm.experimental.guard` intrinsics to non-trivial
simple loop unswitching. These intrinsics represent implicit control flow which
has pretty much the same semantics as usual conditional branches. The
algorithm of dealing with them is following:

- Consider guards as unswitching candidates;
- If a guard is considered the best candidate, turn it into a branch;
- Apply normal unswitching algorithm on this branch.

The patch has no compile time effect on code that does not contain any guards.

Differential Revision: https://reviews.llvm.org/D53744
Reviewed By: chandlerc

Added:
    llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp?rev=345387&r1=345386&r2=345387&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp Fri Oct 26 07:20:11 2018
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -59,6 +60,7 @@ using namespace llvm;
 
 STATISTIC(NumBranches, "Number of branches unswitched");
 STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
 STATISTIC(NumTrivial, "Number of unswitches that are trivial");
 
 static cl::opt<bool> EnableNonTrivialUnswitch(
@@ -70,6 +72,11 @@ static cl::opt<int>
     UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
                       cl::desc("The cost threshold for unswitching a loop."));
 
+static cl::opt<bool> UnswitchGuards(
+    "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
+    cl::desc("If enabled, simple loop unswitching will also consider "
+             "llvm.experimental.guard intrinsics as unswitch candidates."));
+
 /// Collect all of the loop invariant input values transitively used by the
 /// homogeneous instruction graph from a given root.
 ///
@@ -2169,6 +2176,77 @@ computeDomSubtreeCost(DomTreeNode &N,
   return Cost;
 }
 
+/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
+/// making the following replacement:
+///
+///   <code before guard>
+///   call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+///   <code after guard>
+///
+/// into
+///
+///   <code before guard>
+///   br i1 %cond, label %guarded, label %deopt
+///
+/// guarded:
+///   <code after guard>
+///
+/// deopt:
+///   call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+///   unreachable
+///
+/// It also makes all relevant DT and LI updates, so that all structures are in
+/// valid state after this transform.
+static BranchInst *
+turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
+                    SmallVectorImpl<BasicBlock *> &ExitBlocks,
+                    DominatorTree &DT, LoopInfo &LI) {
+  SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
+  LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n");
+  BasicBlock *CheckBB = GI->getParent();
+
+  // Remove all CheckBB's successors from DomTree. A block can be seen among
+  // successors more than once, but for DomTree it should be added only once.
+  SmallPtrSet<BasicBlock *, 4> Successors;
+  for (auto *Succ : successors(CheckBB))
+    if (Successors.insert(Succ).second)
+      DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
+
+  Instruction *DeoptBlockTerm =
+      SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
+  BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
+  // SplitBlockAndInsertIfThen inserts control flow that branches to
+  // DeoptBlockTerm if the condition is true.  We want the opposite.
+  CheckBI->swapSuccessors();
+
+  BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
+  GuardedBlock->setName("guarded");
+  CheckBI->getSuccessor(1)->setName("deopt");
+
+  // We now have a new exit block.
+  ExitBlocks.push_back(CheckBI->getSuccessor(1));
+
+  GI->moveBefore(DeoptBlockTerm);
+  GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
+
+  // Add new successors of CheckBB into DomTree.
+  for (auto *Succ : successors(CheckBB))
+    DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
+
+  // Now the blocks that used to be CheckBB's successors are GuardedBlock's
+  // successors.
+  for (auto *Succ : Successors)
+    DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
+
+  // Make proper changes to DT.
+  DT.applyUpdates(DTUpdates);
+  // Inform LI of a new loop block.
+  L.addBasicBlockToLoop(GuardedBlock, LI);
+
+  ++NumGuards;
+  return CheckBI;
+}
+
 static bool
 unswitchBestCondition(Loop &L, DominatorTree &DT, LoopInfo &LI,
                       AssumptionCache &AC, TargetTransformInfo &TTI,
@@ -2178,10 +2256,29 @@ unswitchBestCondition(Loop &L, Dominator
   // loop which would be handled when visiting that inner loop).
   SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
       UnswitchCandidates;
+
+  // Whether or not we should also collect guards in the loop.
+  bool CollectGuards = false;
+  if (UnswitchGuards) {
+    auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
+        Intrinsic::getName(Intrinsic::experimental_guard));
+    if (GuardDecl && !GuardDecl->use_empty())
+      CollectGuards = true;
+  }
+
   for (auto *BB : L.blocks()) {
     if (LI.getLoopFor(BB) != &L)
       continue;
 
+    if (CollectGuards)
+      for (auto &I : *BB)
+        if (isGuard(&I)) {
+          auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
+          // TODO: Support AND, OR conditions and partial unswitching.
+          if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
+            UnswitchCandidates.push_back({&I, {Cond}});
+        }
+
     if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
       // We can only consider fully loop-invariant switch conditions as we need
       // to completely eliminate the switch after unswitching.
@@ -2346,9 +2443,12 @@ unswitchBestCondition(Loop &L, Dominator
     // Now scale the cost by the number of unique successors minus one. We
     // subtract one because there is already at least one copy of the entire
     // loop. This is computing the new cost of unswitching a condition.
-    assert(Visited.size() > 1 &&
+    // Note that guards always have 2 unique successors that are implicit and
+    // will be materialized if we decide to unswitch it.
+    int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
+    assert(SuccessorsCount > 1 &&
            "Cannot unswitch a condition without multiple distinct successors!");
-    return Cost * (Visited.size() - 1);
+    return Cost * (SuccessorsCount - 1);
   };
   Instruction *BestUnswitchTI = nullptr;
   int BestUnswitchCost;
@@ -2375,6 +2475,11 @@ unswitchBestCondition(Loop &L, Dominator
     return false;
   }
 
+  // If the best candidate is a guard, turn it into a branch.
+  if (isGuard(BestUnswitchTI))
+    BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
+                                         ExitBlocks, DT, LI);
+
   LLVM_DEBUG(dbgs() << "  Unswitching non-trivial (cost = "
                     << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
                     << "\n");

Added: llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll?rev=345387&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll (added)
+++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/guards.ll Fri Oct 26 07:20:11 2018
@@ -0,0 +1,238 @@
+; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -simple-loop-unswitch-guards -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_simple_case(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_simple_case(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:       guarded.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_two_guards(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br i1 [[COND2:%.*]], label [[ENTRY_SPLIT_US_SPLIT_US:%.*]], label [[ENTRY_SPLIT_US_SPLIT:%.*]]
+; CHECK:       entry.split.us.split.us:
+; CHECK-NEXT:    br label [[LOOP_US_US:%.*]]
+; CHECK:       loop.us.us:
+; CHECK-NEXT:    [[IV_US_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US_SPLIT_US]] ], [ [[IV_NEXT_US_US:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US_US:%.*]]
+; CHECK:       guarded.us.us:
+; CHECK-NEXT:    br label [[GUARDED_US2]]
+; CHECK:       guarded.us2:
+; CHECK-NEXT:    [[IV_NEXT_US_US]] = add i32 [[IV_US_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US_US:%.*]] = icmp slt i32 [[IV_NEXT_US_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US_US]], label [[LOOP_US_US]], label [[EXIT_SPLIT_US_SPLIT_US:%.*]]
+; CHECK:       deopt1:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_conditional_guards(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_conditional_guards(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[BACKEDGE_US:%.*]] ]
+; CHECK-NEXT:    [[CONDITION_US:%.*]] = icmp eq i32 [[IV_US]], 123
+; CHECK-NEXT:    br i1 [[CONDITION_US]], label [[GUARD_US:%.*]], label [[BACKEDGE_US]]
+; CHECK:       guard.us:
+; CHECK-NEXT:    br label [[GUARDED_US:%.*]]
+; CHECK:       backedge.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[CONDITION:%.*]] = icmp eq i32 [[IV]], 123
+; CHECK-NEXT:    br i1 [[CONDITION]], label [[GUARD:%.*]], label [[BACKEDGE]]
+; CHECK:       guard:
+; CHECK-NEXT:    br label [[DEOPT:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       backedge:
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label %loop, label [[EXIT_SPLIT:%.*]]
+;
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]
+  %condition = icmp eq i32 %iv, 123
+  br i1 %condition, label %guard, label %backedge
+
+guard:
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  br label %backedge
+
+backedge:
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_nested_loop(i1 %cond, i32 %N) {
+; CHECK-LABEL: @test_nested_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[ENTRY_SPLIT:%.*]], label [[OUTER_LOOP_SPLIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[OUTER_LOOP:%.*]]
+; CHECK:       outer_loop:
+; CHECK-NEXT:    br label [[OUTER_LOOP_SPLIT_US:%.*]]
+; CHECK:       outer_loop.split.us:
+; CHECK-NEXT:    br label [[LOOP_US:%.*]]
+; CHECK:       loop.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[OUTER_LOOP_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:       guarded.us:
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    [[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
+; CHECK-NEXT:    br i1 [[LOOP_COND_US]], label [[LOOP_US]], label [[OUTER_BACKEDGE_SPLIT_US:%.*]]
+; CHECK:       outer_backedge.split.us:
+; CHECK-NEXT:    br label [[OUTER_BACKEDGE:%.*]]
+; CHECK:       deopt:
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:       outer_backedge:
+; CHECK-NEXT:    br i1 false, label [[OUTER_LOOP]], label [[EXIT:%.*]]
+;
+
+entry:
+  br label %outer_loop
+
+outer_loop:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %outer_loop ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  %loop.cond = icmp slt i32 %iv.next, %N
+  br i1 %loop.cond, label %loop, label %outer_backedge
+
+outer_backedge:
+  br i1 undef, label %outer_loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @test_sibling_loops(i1 %cond1, i1 %cond2, i32 %N) {
+; CHECK-LABEL: @test_sibling_loops(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:         [[IV1_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV1_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US]]
+; CHECK:         call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+; CHECK:         [[IV2_US:%.*]] = phi i32 [ 0, [[BETWEEN:%.*]] ], [ [[IV1_NEXT_US2:%.*]], [[GUARDED_US2:%.*]] ]
+; CHECK-NEXT:    br label [[GUARDED_US2]]
+; CHECK:         call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT:    unreachable
+;
+
+entry:
+  br label %loop1
+
+loop1:
+  %iv1 = phi i32 [ 0, %entry ], [ %iv1.next, %loop1 ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+  %iv1.next = add i32 %iv1, 1
+  %loop1.cond = icmp slt i32 %iv1.next, %N
+  br i1 %loop1.cond, label %loop1, label %between
+
+between:
+  br label %loop2
+
+loop2:
+  %iv2 = phi i32 [ 0, %between ], [ %iv2.next, %loop2 ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+  %iv2.next = add i32 %iv2, 1
+  %loop2.cond = icmp slt i32 %iv2.next, %N
+  br i1 %loop2.cond, label %loop2, label %exit
+
+exit:
+  ret void
+}
+
+; Check that we don't do anything because of cleanuppad.
+; CHECK-LABEL: @test_cleanuppad(
+; CHECK:       call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+; CHECK-NOT:   call void (i1, ...) @llvm.experimental.guard(
+define void @test_cleanuppad(i1 %cond, i32 %N) personality i32 (...)* @__CxxFrameHandler3 {
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+  %iv.next = add i32 %iv, 1
+  invoke void @may_throw(i32 %iv) to label %loop unwind label %exit
+
+exit:
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+}
+
+declare void @may_throw(i32 %i)
+declare i32 @__CxxFrameHandler3(...)




More information about the llvm-commits mailing list