[llvm] 8a95962 - [LoopPeel] Peel loops with deoptimizing exits

Max Kazantsev via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 7 20:32:53 PDT 2021


Author: Max Kazantsev
Date: 2021-10-08T10:32:13+07:00
New Revision: 8a959625c433f311233682afa7bfe1c76367700d

URL: https://github.com/llvm/llvm-project/commit/8a959625c433f311233682afa7bfe1c76367700d
DIFF: https://github.com/llvm/llvm-project/commit/8a959625c433f311233682afa7bfe1c76367700d.diff

LOG: [LoopPeel] Peel loops with deoptimizing exits

Added support for peeling loops with "deoptimizing" exits -
such exits that it or any of its children (or any of their
children, etc) either has a @llvm.experimental.deoptimize call
prior to the terminating return instruction of this basic block
or is terminated with unreachable. All blocks in the the
sequence must have a single successor, maybe except for the last
one.

Previously we only checked the exit block for being deoptimizing.
Now we check if the last reachable block from the exit is deoptimizing.

Patch by Dmitry Makogon!

Differential Revision: https://reviews.llvm.org/D110922
Reviewed By: mkazantsev

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
    llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
    llvm/lib/Transforms/Utils/LoopPeel.cpp
    llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 4bea4c237771e..52a3fcc23b37b 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
 /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
 void ReplaceInstWithInst(Instruction *From, Instruction *To);
 
+/// Check if we can prove that all paths starting from this block converge
+/// to a block that either has a @llvm.experimental.deoptimize call
+/// prior to its terminating return instruction or is terminated by unreachable.
+/// All blocks in the traversed sequence must have a single predecessor, maybe
+/// except for the last one.
+bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
+
 /// Option class for critical edge splitting.
 ///
 /// This provides a builder interface for overriding the default options used

diff  --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 1a07697009eaa..de6c2af61e7d8 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -52,6 +53,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "basicblock-utils"
 
+static cl::opt<unsigned> MaxDeoptimizingCheckDepth(
+    "max-deopt-check-depth", cl::init(8), cl::Hidden,
+    cl::desc("Set the maximum path length when checking whether a basic block "
+             "is deoptimizing"));
+
 void llvm::DetatchDeadBlocks(
     ArrayRef<BasicBlock *> BBs,
     SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@@ -485,6 +491,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
   BI = New;
 }
 
+bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
+  // Remember visited blocks to avoid infinite loop
+  SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+  unsigned Depth = 0;
+  while (BB && Depth++ < MaxDeoptimizingCheckDepth &&
+         VisitedBlocks.insert(BB).second) {
+    if (BB->getTerminatingDeoptimizeCall() ||
+        isa<UnreachableInst>(BB->getTerminator()))
+      return true;
+    BB = BB->getSingleSuccessor();
+  }
+  return false;
+}
+
 void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
   BasicBlock::iterator BI(From);
   ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);

diff  --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index a6cdce1f4b8f0..41b4ca7d0074b 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -102,15 +102,15 @@ bool llvm::canPeel(Loop *L) {
   SmallVector<BasicBlock *, 4> Exits;
   L->getUniqueNonLatchExitBlocks(Exits);
   // The latch must either be the only exiting block or all non-latch exit
-  // blocks have either a deopt or unreachable terminator. Both deopt and
-  // unreachable terminators are a strong indication they are not taken. Note
-  // that this is a profitability check, not a legality check. Also note that
-  // LoopPeeling currently can only update the branch weights of latch blocks
-  // and branch weights to blocks with deopt or unreachable do not need
+  // blocks have either a deopt or unreachable terminator or compose a chain of
+  // blocks where the last one is either deopt or unreachable terminated. Both
+  // deopt and unreachable terminators are a strong indication they are not
+  // taken. Note that this is a profitability check, not a legality check. Also
+  // note that LoopPeeling currently can only update the branch weights of latch
+  // blocks and branch weights to blocks with deopt or unreachable do not need
   // updating.
-  return all_of(Exits, [](const BasicBlock *BB) {
-    return BB->getTerminatingDeoptimizeCall() ||
-           isa<UnreachableInst>(BB->getTerminator());
+  return all_of(Exits, [&](const BasicBlock *BB) {
+    return IsBlockFollowedByDeoptOrUnreachable(BB);
   });
 }
 
@@ -667,37 +667,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
   L->getExitEdges(ExitEdges);
 
-  DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
-  if (DT) {
-    // We'd like to determine the idom of exit block after peeling one
-    // iteration.
-    // Let Exit is exit block.
-    // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
-    // blocks.
-    // Let Latch' and ExitingSet' are copies after a peeling.
-    // We'd like to find an idom'(Exit) - idom of Exit after peeling.
-    // It is an evident that idom'(Exit) will be the nearest common dominator
-    // of ExitingSet and ExitingSet'.
-    // idom(Exit) is a nearest common dominator of ExitingSet.
-    // idom(Exit)' is a nearest common dominator of ExitingSet'.
-    // Taking into account that we have a single Latch, Latch' will dominate
-    // Header and idom(Exit).
-    // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
-    // All these basic blocks are in the same loop, so what we find is
-    // (nearest common dominator of idom(Exit) and Latch)'.
-    // In the loop below we remember nearest common dominator of idom(Exit) and
-    // Latch to update idom of Exit later.
-    assert(L->hasDedicatedExits() && "No dedicated exits?");
-    for (auto Edge : ExitEdges) {
-      if (ExitIDom.count(Edge.second))
-        continue;
-      BasicBlock *BB = DT->findNearestCommonDominator(
-          DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
-      assert(L->contains(BB) && "IDom is not in a loop");
-      ExitIDom[Edge.second] = BB;
-    }
-  }
-
   Function *F = Header->getParent();
 
   // Set up all the necessary basic blocks. It is convenient to split the
@@ -769,6 +738,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
   identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
 
+  SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
+
   // For each peeled-off iteration, make a copy of the loop.
   for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
     SmallVector<BasicBlock *, 8> NewBlocks;
@@ -782,14 +753,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
     // previous one.
     remapInstructionsInBlocks(NewBlocks, VMap);
 
+    // If DT is available, insert edges from cloned exiting blocks to the exits
     if (DT) {
-      // Latches of the cloned loops dominate over the loop exit, so idom of the
-      // latter is the first cloned loop body, as original PreHeader dominates
-      // the original loop body.
-      if (Iter == 0)
-        for (auto Exit : ExitIDom)
-          DT->changeImmediateDominator(Exit.first,
-                                       cast<BasicBlock>(LVMap[Exit.second]));
+      for (auto Exit : ExitEdges)
+        DTUpdates.push_back(
+            {DT->Insert, cast<BasicBlock>(LVMap[Exit.first]), Exit.second});
 #ifdef EXPENSIVE_CHECKS
       assert(DT->verify(DominatorTree::VerificationLevel::Fast));
 #endif
@@ -836,6 +804,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
   // We modified the loop, update SE.
   SE->forgetTopmostLoop(L);
 
+  DT->applyUpdates(DTUpdates);
+
   // Finally DomtTree must be correct.
   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
 

diff  --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll
index 98c78ff87f87c..bf264aac3a49e 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll
@@ -193,28 +193,56 @@ unreachable.exit:
 define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) {
 ; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER_PEEL_BEGIN:%.*]]
+; CHECK:       loop.header.peel.begin:
+; CHECK-NEXT:    br label [[LOOP_HEADER_PEEL:%.*]]
+; CHECK:       loop.header.peel:
+; CHECK-NEXT:    [[C_PEEL:%.*]] = icmp ult i32 1, 2
+; CHECK-NEXT:    br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]]
+; CHECK:       else.peel:
+; CHECK-NEXT:    [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]]
+; CHECK-NEXT:    br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]]
+; CHECK:       then.peel:
+; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]]
+; CHECK:       loop.latch.peel:
+; CHECK-NEXT:    [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ]
+; CHECK-NEXT:    [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT:    store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4
+; CHECK-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
+; CHECK-NEXT:    [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
+; CHECK-NEXT:    br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop.header.peel.next:
+; CHECK-NEXT:    br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
+; CHECK:       loop.header.peel.next1:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], 2
-; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]]
 ; CHECK:       then:
-; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
 ; CHECK:       else:
-; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]]
-; CHECK-NEXT:    br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]]
+; CHECK-NEXT:    br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]]
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]]
 ; CHECK-NEXT:    store i32 [[M]], i32* [[GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp ult i32 [[IV]], 1000
-; CHECK-NEXT:    br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
+; CHECK-NEXT:    br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       exit.loopexit:
+; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
+; CHECK:       exit.1.loopexit:
+; CHECK-NEXT:    br label [[EXIT_1]]
 ; CHECK:       exit.1:
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[UNREACHABLE_TERM:%.*]]
+; CHECK:       exit.2.loopexit:
+; CHECK-NEXT:    br label [[EXIT_2]]
 ; CHECK:       exit.2:
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label [[UNREACHABLE_TERM]]


        


More information about the llvm-commits mailing list