[llvm-branch-commits] [llvm] bee4868 - [LoopUnswitch] Implement first version of partial unswitching.

Florian Hahn via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jan 21 01:51:57 PST 2021


Author: Florian Hahn
Date: 2021-01-21T09:46:41Z
New Revision: bee486851c1a72899bd3c0f9b38249bbe5c38951

URL: https://github.com/llvm/llvm-project/commit/bee486851c1a72899bd3c0f9b38249bbe5c38951
DIFF: https://github.com/llvm/llvm-project/commit/bee486851c1a72899bd3c0f9b38249bbe5c38951.diff

LOG: [LoopUnswitch] Implement first version of partial unswitching.

This patch applies the idea from D93734 to LoopUnswitch.

It adds support for unswitching on conditions that are only
invariant along certain paths through a loop.

In particular, it targets conditions in the loop header that
depend on values loaded from memory. If either path from
the true or false successor through the loop does not modify
memory, perform partial loop unswitching.

That is, duplicate the instructions feeding the condition in the pre-header.
Then unswitch on the duplicated condition. The condition is now known
in the unswitched version for the 'invariant' path through the original loop.

On caveat of this approach is that one of the loops created can be partially
unswitched again. To avoid this behavior, `llvm.loop.unswitch.partial.disable`
metadata is added to the unswitched loops, to avoid subsequent partial
unswitching.

If that's the approach to go, I can move the code handling the metadata kind
into separate functions.

This increases the cases we unswitch quite a bit in SPEC2006/SPEC2000 &
MultiSource. It also allows us to eliminate a dead loop in SPEC2017's omnetpp

```
Tests: 236
Same hash: 170 (filtered out)
Remaining: 66
Metric: loop-unswitch.NumBranches

Program                                        base   patch  diff
 test-suite...000/255.vortex/255.vortex.test     2.00  23.00 1050.0%
 test-suite...T2006/401.bzip2/401.bzip2.test     7.00  55.00 685.7%
 test-suite :: External/Nurbs/nurbs.test         5.00  26.00 420.0%
 test-suite...s-C/unix-smail/unix-smail.test     1.00   3.00 200.0%
 test-suite.../Prolangs-C++/ocean/ocean.test     1.00   3.00 200.0%
 test-suite...tions/lambda-0.1.3/lambda.test     1.00   3.00 200.0%
 test-suite...yApps-C++/PENNANT/PENNANT.test     2.00   5.00 150.0%
 test-suite...marks/Ptrdist/yacr2/yacr2.test     1.00   2.00 100.0%
 test-suite...lications/viterbi/viterbi.test     1.00   2.00 100.0%
 test-suite...plications/d/make_dparser.test    12.00  24.00 100.0%
 test-suite...CFP2006/433.milc/433.milc.test    14.00  27.00 92.9%
 test-suite.../Applications/lemon/lemon.test     7.00  12.00 71.4%
 test-suite...ce/Applications/Burg/burg.test     6.00  10.00 66.7%
 test-suite...T2006/473.astar/473.astar.test    16.00  26.00 62.5%
 test-suite...marks/7zip/7zip-benchmark.test    78.00 121.00 55.1%
```

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D93764

Added: 
    llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
    llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 3c1a6d118c80..2e3ab5029fd2 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -99,6 +99,12 @@ static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
           cl::init(100), cl::Hidden);
 
+static cl::opt<unsigned>
+    MSSAThreshold("loop-unswitch-memoryssa-threshold",
+                  cl::desc("Max number of memory uses to explore during "
+                           "partial unswitching analysis"),
+                  cl::init(100), cl::Hidden);
+
 namespace {
 
   class LUAnalysisCache {
@@ -185,6 +191,7 @@ namespace {
     Loop *CurrentLoop = nullptr;
     DominatorTree *DT = nullptr;
     MemorySSA *MSSA = nullptr;
+    AAResults *AA = nullptr;
     std::unique_ptr<MemorySSAUpdater> MSSAU;
     BasicBlock *LoopHeader = nullptr;
     BasicBlock *LoopPreheader = nullptr;
@@ -249,19 +256,22 @@ namespace {
     bool tryTrivialLoopUnswitch(bool &Changed);
 
     bool unswitchIfProfitable(Value *LoopCond, Constant *Val,
-                              Instruction *TI = nullptr);
+                              Instruction *TI = nullptr,
+                              ArrayRef<Instruction *> ToDuplicate = {});
     void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
                                   BasicBlock *ExitBlock, Instruction *TI);
     void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L,
-                                     Instruction *TI);
+                                     Instruction *TI,
+                                     ArrayRef<Instruction *> ToDuplicate = {});
 
     void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                               Constant *Val, bool IsEqual);
 
-    void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
-                                        BasicBlock *TrueDest,
-                                        BasicBlock *FalseDest,
-                                        BranchInst *OldBranch, Instruction *TI);
+    void
+    emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+                                   BasicBlock *TrueDest, BasicBlock *FalseDest,
+                                   BranchInst *OldBranch, Instruction *TI,
+                                   ArrayRef<Instruction *> ToDuplicate = {});
 
     void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L);
 
@@ -528,6 +538,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   LPM = &LPMRef;
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   if (EnableMSSALoopDependency) {
     MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
     MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
@@ -629,6 +640,129 @@ static bool equalityPropUnSafe(Value &LoopCond) {
   return false;
 }
 
+/// Check if the loop header has a conditional branch that is not
+/// loop-invariant, because it involves load instructions. If all paths from
+/// either the true or false successor to the header or loop exists do not
+/// modify the memory feeding the condition, perform 'partial unswitching'. That
+/// is, duplicate the instructions feeding the condition in the pre-header. Then
+/// unswitch on the duplicated condition. The condition is now known in the
+/// unswitched version for the 'invariant' path through the original loop.
+///
+/// If the branch condition of the header is partially invariant, return a pair
+/// containing the instructions to duplicate and a boolean Constant to update
+/// the condition in the loops created for the true or false successors.
+static std::pair<SmallVector<Instruction *, 4>, Constant *>
+hasPartialIVCondition(Loop *L, MemorySSA &MSSA, AAResults *AA) {
+  SmallVector<Instruction *, 4> ToDuplicate;
+
+  auto *TI = dyn_cast<BranchInst>(L->getHeader()->getTerminator());
+  if (!TI || !TI->isConditional())
+    return {};
+
+  auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+  // The case with the condition outside the loop should already be handled
+  // earlier.
+  if (!CondI || !L->contains(CondI))
+    return {};
+
+  ToDuplicate.push_back(CondI);
+
+  SmallVector<Value *, 4> WorkList;
+  WorkList.append(CondI->op_begin(), CondI->op_end());
+
+  SmallVector<MemoryAccess *, 4> AccessesToCheck;
+  SmallVector<MemoryLocation, 4> AccessedLocs;
+  while (!WorkList.empty()) {
+    Instruction *I = dyn_cast<Instruction>(WorkList.pop_back_val());
+    if (!I || !L->contains(I))
+      continue;
+
+    // TODO: support additional instructions.
+    if (!isa<LoadInst>(I) && !isa<GetElementPtrInst>(I))
+      return {};
+
+    // Do not duplicate volatile loads.
+    if (auto *LI = dyn_cast<LoadInst>(I))
+      if (LI->isVolatile())
+        return {};
+
+    ToDuplicate.push_back(I);
+    if (auto *MemUse = dyn_cast_or_null<MemoryUse>(MSSA.getMemoryAccess(I))) {
+      // Queue the defining access to check for alias checks.
+      AccessesToCheck.push_back(MemUse->getDefiningAccess());
+      AccessedLocs.push_back(MemoryLocation::get(I));
+    }
+    WorkList.append(I->op_begin(), I->op_end());
+  }
+
+  if (ToDuplicate.size() <= 1)
+    return {};
+
+  auto HasNoClobbersOnPath =
+      [L, AA, &AccessedLocs](BasicBlock *Succ, BasicBlock *Header,
+                             SmallVector<MemoryAccess *, 4> AccessesToCheck) {
+        // First, collect all blocks in the loop that are on a patch from Succ
+        // to the header.
+        SmallVector<BasicBlock *, 4> WorkList;
+        WorkList.push_back(Succ);
+        WorkList.push_back(Header);
+        SmallPtrSet<BasicBlock *, 4> Seen;
+        Seen.insert(Header);
+        while (!WorkList.empty()) {
+          BasicBlock *Current = WorkList.pop_back_val();
+          if (!L->contains(Current))
+            continue;
+          const auto &SeenIns = Seen.insert(Current);
+          if (!SeenIns.second)
+            continue;
+
+          WorkList.append(succ_begin(Current), succ_end(Current));
+        }
+
+        // Next, check if there are any MemoryDefs that are on the path through
+        // the loop (in the Seen set) and they may-alias any of the locations in
+        // AccessedLocs. If that is the case, they may modify the condition and
+        // partial unswitching is not possible.
+        SmallPtrSet<MemoryAccess *, 4> SeenAccesses;
+        while (!AccessesToCheck.empty()) {
+          MemoryAccess *Current = AccessesToCheck.pop_back_val();
+          auto SeenI = SeenAccesses.insert(Current);
+          if (!SeenI.second || !Seen.contains(Current->getBlock()))
+            continue;
+
+          // Bail out if exceeded the threshold.
+          if (SeenAccesses.size() >= MSSAThreshold)
+            return false;
+
+          // MemoryUse are read-only accesses.
+          if (isa<MemoryUse>(Current))
+            continue;
+
+          // For a MemoryDef, check if is aliases any of the location feeding
+          // the original condition.
+          if (auto *CurrentDef = dyn_cast<MemoryDef>(Current)) {
+            if (any_of(AccessedLocs, [AA, CurrentDef](MemoryLocation &Loc) {
+                  return isModSet(
+                      AA->getModRefInfo(CurrentDef->getMemoryInst(), Loc));
+                }))
+              return false;
+          }
+
+          for (Use &U : Current->uses())
+            AccessesToCheck.push_back(cast<MemoryAccess>(U.getUser()));
+        }
+
+        return true;
+      };
+
+  if (HasNoClobbersOnPath(TI->getSuccessor(0), L->getHeader(), AccessesToCheck))
+    return {ToDuplicate, ConstantInt::getTrue(TI->getContext())};
+  if (HasNoClobbersOnPath(TI->getSuccessor(1), L->getHeader(), AccessesToCheck))
+    return {ToDuplicate, ConstantInt::getFalse(TI->getContext())};
+
+  return {};
+}
+
 /// Do actual work and unswitch loop if possible and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
@@ -828,6 +962,26 @@ bool LoopUnswitch::processCurrentLoop() {
         }
       }
   }
+
+  // Check if there is a header condition that is invariant along the patch from
+  // either the true or false successors to the header. This allows unswitching
+  // conditions depending on memory accesses, if there's a path not clobbering
+  // the memory locations. Check if this transform has been disabled using
+  // metadata, to avoid unswitching the same loop multiple times.
+  if (MSSA &&
+      !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) {
+    auto ToDuplicate = hasPartialIVCondition(CurrentLoop, *MSSA, AA);
+    if (!ToDuplicate.first.empty()) {
+      ++NumBranches;
+      unswitchIfProfitable(ToDuplicate.first[0], ToDuplicate.second,
+                           CurrentLoop->getHeader()->getTerminator(),
+                           ToDuplicate.first);
+
+      RedoLoop = false;
+      return true;
+    }
+  }
+
   return Changed;
 }
 
@@ -885,7 +1039,8 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
 /// simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
 bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
-                                        Instruction *TI) {
+                                        Instruction *TI,
+                                        ArrayRef<Instruction *> ToDuplicate) {
   // Check to see if it would be profitable to unswitch current loop.
   if (!BranchesInfo.costAllowsUnswitching()) {
     LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
@@ -905,31 +1060,65 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
     return false;
   }
 
-  unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI);
+  unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate);
   return true;
 }
 
 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
 /// otherwise branch to FalseDest. Insert the code immediately before OldBranch
 /// and remove (but not erase!) it from the function.
-void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
-                                                  BasicBlock *TrueDest,
-                                                  BasicBlock *FalseDest,
-                                                  BranchInst *OldBranch,
-                                                  Instruction *TI) {
+void LoopUnswitch::emitPreheaderBranchOnCondition(
+    Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest,
+    BranchInst *OldBranch, Instruction *TI,
+    ArrayRef<Instruction *> ToDuplicate) {
   assert(OldBranch->isUnconditional() && "Preheader is not split correctly");
   assert(TrueDest != FalseDest && "Branch targets should be 
diff erent");
+
   // Insert a conditional branch on LIC to the two preheaders.  The original
   // code is the true version and the new code is the false version.
   Value *BranchVal = LIC;
   bool Swapped = false;
-  if (!isa<ConstantInt>(Val) ||
-      Val->getType() != Type::getInt1Ty(LIC->getContext()))
-    BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
-  else if (Val != ConstantInt::getTrue(Val->getContext())) {
-    // We want to enter the new loop when the condition is true.
-    std::swap(TrueDest, FalseDest);
-    Swapped = true;
+
+  if (!ToDuplicate.empty()) {
+    ValueToValueMapTy Old2New;
+    for (Instruction *I : reverse(ToDuplicate)) {
+      auto *New = I->clone();
+      New->insertBefore(OldBranch);
+      RemapInstruction(New, Old2New,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+      Old2New[I] = New;
+
+      if (MSSAU) {
+        MemorySSA *MSSA = MSSAU->getMemorySSA();
+        auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I));
+        if (!MemA)
+          continue;
+
+        Loop *L = LI->getLoopFor(I->getParent());
+        auto *DefiningAccess = MemA->getDefiningAccess();
+        // If the defining access is a MemoryPhi in the header, get the incoming
+        // value for the pre-header as defining access.
+        if (DefiningAccess->getBlock() == I->getParent()) {
+          if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+            DefiningAccess =
+                MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+          }
+        }
+        MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
+                                      MemorySSA::BeforeTerminator);
+      }
+    }
+    BranchVal = Old2New[ToDuplicate[0]];
+  } else {
+
+    if (!isa<ConstantInt>(Val) ||
+        Val->getType() != Type::getInt1Ty(LIC->getContext()))
+      BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val);
+    else if (Val != ConstantInt::getTrue(Val->getContext())) {
+      // We want to enter the new loop when the condition is true.
+      std::swap(TrueDest, FalseDest);
+      Swapped = true;
+    }
   }
 
   // Old branch will be removed, so save its parent and successor to update the
@@ -1213,8 +1402,9 @@ void LoopUnswitch::splitExitEdges(
 /// We determined that the loop is profitable to unswitch when LIC equal Val.
 /// Split it into loop versions and test the condition outside of either loop.
 /// Return the loops created as Out1/Out2.
-void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
-                                               Loop *L, Instruction *TI) {
+void LoopUnswitch::unswitchNontrivialCondition(
+    Value *LIC, Constant *Val, Loop *L, Instruction *TI,
+    ArrayRef<Instruction *> ToDuplicate) {
   Function *F = LoopHeader->getParent();
   LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
                     << LoopHeader->getName() << " [" << L->getBlocks().size()
@@ -1346,7 +1536,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // Emit the new branch that selects between the two versions of this loop.
   emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR,
-                                 TI);
+                                 TI, ToDuplicate);
   if (MSSAU) {
     // Update MemoryPhis in Exit blocks.
     MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT);
@@ -1368,17 +1558,38 @@ void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val,
   // iteration.
   WeakTrackingVH LICHandle(LIC);
 
-  // Now we rewrite the original code to know that the condition is true and the
-  // new code to know that the condition is false.
-  rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
-
-  // It's possible that simplifying one loop could cause the other to be
-  // changed to another value or a constant.  If its a constant, don't simplify
-  // it.
-  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
-      LICHandle && !isa<Constant>(LICHandle))
-    rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
-                                         /*IsEqual=*/true);
+  if (ToDuplicate.empty()) {
+    // Now we rewrite the original code to know that the condition is true and
+    // the new code to know that the condition is false.
+    rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false);
+
+    // It's possible that simplifying one loop could cause the other to be
+    // changed to another value or a constant.  If its a constant, don't
+    // simplify it.
+    if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+        LICHandle && !isa<Constant>(LICHandle))
+      rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val,
+                                           /*IsEqual=*/true);
+  } else {
+    // Partial unswitching. Update the condition in the right loop with the
+    // constant.
+    auto *CC = cast<ConstantInt>(Val);
+    if (CC->isOneValue()) {
+      rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val,
+                                           /*IsEqual=*/true);
+    } else
+      rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true);
+
+    // Mark the new loop as partially unswitched, to avoid unswitching on the
+    // same condition again.
+    auto &Context = NewLoop->getHeader()->getContext();
+    MDNode *DisableUnswitchMD = MDNode::get(
+        Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
+    MDNode *NewLoopID = makePostTransformationMetadata(
+        Context, L->getLoopID(), {"llvm.loop.unswitch.partial"},
+        {DisableUnswitchMD});
+    NewLoop->setLoopID(NewLoopID);
+  }
 
   if (MSSA && VerifyMemorySSA)
     MSSA->verifyMemorySSA();

diff  --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll
new file mode 100644
index 000000000000..1bd375cf4d2d
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch-mssa-threshold.ll
@@ -0,0 +1,48 @@
+; RUN: opt -loop-unswitch -loop-unswitch-memoryssa-threshold=0 -memssa-check-limit=1 -enable-new-pm=0 -S %s | FileCheck --check-prefix=THRESHOLD-0 %s
+; RUN: opt -loop-unswitch -memssa-check-limit=1 -S -enable-new-pm=0 %s | FileCheck --check-prefix=THRESHOLD-DEFAULT %s
+
+; Make sure -loop-unswitch-memoryssa-threshold works. The test uses
+; -memssa-check-limit=1 to effectively disable any MemorySSA optimizations
+; on construction, so the test can be kept simple.
+
+declare void @clobber()
+
+; Partial unswitching is possible, because the store in %noclobber does not
+; alias the load of the condition.
+define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) {
+; THRESHOLD-0-LABEL: @partial_unswitch_true_successor
+; THRESHOLD-0: entry:
+; THRESHOLD-0: br label %loop.header
+;
+; THRESHOLD-DEFAULT-LABEL: @partial_unswitch_true_successor
+; THRESHOLD-DEFAULT-NEXT:  entry:
+; THRESHOLD-DEFAULT-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4
+; THRESHOLD-DEFAULT-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; THRESHOLD-DEFAULT-NEXT:   br i1 [[C]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %lv = load i32, i32* %ptr.1
+  %sc = icmp eq i32 %lv, 100
+  br i1 %sc, label %noclobber, label %clobber
+
+noclobber:
+  %gep.1 = getelementptr i32, i32* %ptr.2, i32 %iv
+  store i32 %lv, i32* %gep.1
+  br label %loop.latch
+
+clobber:
+  call void @clobber()
+  br label %loop.latch
+
+loop.latch:
+  %c = icmp ult i32 %iv, %N
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %loop.header, label %exit
+
+exit:
+  ret i32 10
+}

diff  --git a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
index c7dcacbedd3a..63ce82230322 100644
--- a/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/LoopUnswitch/partial-unswitch.ll
@@ -5,7 +5,55 @@ declare void @clobber()
 define i32 @partial_unswitch_true_successor(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_true_successor
 ; CHECK-LABEL: entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]]
+
+; CHECK:      [[FALSE_CRIT]]:
+; CHECK-NEXT:   br label %[[FALSE_PH:[a-z.]+]]
+
+; CHECK:      [[SPLIT_TRUE_PH]]:
+; CHECK-NEXT:   br label %[[TRUE_HEADER:[a-z.]+]]
+
+; CHECK: [[TRUE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100
+; CHECK-NEXT:    br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[TRUE_HEADER]]
+
+
+; CHECK:      [[FALSE_PH]]:
+; CHECK-NEXT:   br label %[[FALSE_HEADER:[a-z.]+]]
+
+; CHECK: [[FALSE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100
+; CHECK-NEXT:     br i1 [[FALSE_C]], label  %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[FALSE_HEADER]]
 ;
 entry:
   br label %loop.header
@@ -35,7 +83,55 @@ exit:
 define i32 @partial_unswitch_false_successor(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_false_successor
 ; CHECK-LABEL: entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]]
+
+; CHECK:      [[FALSE_CRIT]]:
+; CHECK-NEXT:   br label %[[FALSE_PH:[a-z.]+]]
+
+; CHECK:      [[SPLIT_TRUE_PH]]:
+; CHECK-NEXT:   br label %[[TRUE_HEADER:[a-z.]+]]
+
+; CHECK: [[TRUE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100
+; CHECK-NEXT:    br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[TRUE_HEADER]]
+
+
+; CHECK:      [[FALSE_PH]]:
+; CHECK-NEXT:   br label %[[FALSE_HEADER:[a-z.]+]]
+
+; CHECK: [[FALSE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100
+; CHECK-NEXT:     br i1 false, label  %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[FALSE_HEADER]]
 ;
 entry:
   br label %loop.header
@@ -65,7 +161,61 @@ exit:
 define i32 @partial_unswtich_gep_load_icmp(i32** %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswtich_gep_load_icmp
 ; CHECK-LABEL: entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1
+; CHECK-NEXT:   [[LV0:%[a-z.0-9]+]] = load i32*, i32** [[GEP]]
+; CHECK-NEXT:   [[LV1:%[a-z.0-9]+]] = load i32, i32* [[LV0]]
+; CHECK-NEXT:   [[C:%[a-z.0-9]+]] = icmp eq i32 [[LV1]], 100
+; CHECK-NEXT:   br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]]
+
+; CHECK:      [[FALSE_CRIT]]:
+; CHECK-NEXT:   br label %[[FALSE_PH:[a-z.]+]]
+
+; CHECK:      [[SPLIT_TRUE_PH]]:
+; CHECK-NEXT:   br label %[[TRUE_HEADER:[a-z.]+]]
+
+; CHECK: [[TRUE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:   [[TRUE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1
+; CHECK-NEXT:   [[TRUE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[TRUE_GEP]]
+; CHECK-NEXT:   [[TRUE_LV1:%[a-z.0-9]+]] = load i32, i32* [[TRUE_LV0]]
+; CHECK-NEXT:   [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV1]], 100
+; CHECK-NEXT:   br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[TRUE_HEADER]]
+
+; CHECK:      [[FALSE_PH]]:
+; CHECK-NEXT:   br label %[[FALSE_HEADER:[a-z.]+]]
+
+; CHECK: [[FALSE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:   [[FALSE_GEP:%[a-z.0-9]+]] = getelementptr i32*, i32** %ptr, i32 1
+; CHECK-NEXT:   [[FALSE_LV0:%[a-z.0-9]+]] = load i32*, i32** [[FALSE_GEP]]
+; CHECK-NEXT:   [[FALSE_LV1:%[a-z.0-9]+]] = load i32, i32* [[FALSE_LV0]]
+; CHECK-NEXT:   [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV1]], 100
+; CHECK-NEXT:   br i1 [[FALSE_C]], label  %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_NOCLOBBER]]:
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+
+; CHECK: [[FALSE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[FALSE_HEADER]]
 ;
 entry:
   br label %loop.header
@@ -97,7 +247,63 @@ exit:
 define i32 @partial_unswitch_reduction_phi(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_reduction_phi
 ; CHECK-LABEL: entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]]
+
+; CHECK:      [[FALSE_CRIT]]:
+; CHECK-NEXT:   br label %[[FALSE_PH:[a-z.]+]]
+
+; CHECK:      [[SPLIT_TRUE_PH]]:
+; CHECK-NEXT:   br label %[[TRUE_HEADER:[a-z.]+]]
+
+; CHECK: [[TRUE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[TRUE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[SPLIT_TRUE_PH]] ], [ [[TRUE_RED_NEXT:%[a-z.0-9]+]], %[[TRUE_LATCH:[a-z.0-9]+]]
+; CHECK-NEXT:    [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100
+; CHECK-NEXT:    br i1 [[TRUE_C]], label %[[TRUE_CLOBBER:.+]], label %[[TRUE_NOCLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_NOCLOBBER]]:
+; CHECK-NEXT:  [[TRUE_ADD10:%.+]] = add i32 [[TRUE_RED]], 10
+; CHECK-NEXT:  br label %[[TRUE_LATCH]]
+
+; CHECK: [[TRUE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  [[TRUE_ADD5:%.+]] = add i32 [[TRUE_RED]], 5
+; CHECK-NEXT:  br label %[[TRUE_LATCH]]
+
+; CHECK: [[TRUE_LATCH]]:
+; CHECK-NEXT:   [[TRUE_RED_NEXT]] = phi i32 [ [[TRUE_ADD5]], %[[TRUE_CLOBBER]] ], [ [[TRUE_ADD10]], %[[TRUE_NOCLOBBER]] ]
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[TRUE_HEADER]]
+
+
+; CHECK:      [[FALSE_PH]]:
+; CHECK-NEXT:   br label %[[FALSE_HEADER:[a-z.]+]]
+
+; CHECK: [[FALSE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[FALSE_RED:%[a-z.0-9]+]] = phi i32 [ 20, %[[FALSE_PH]] ], [ [[FALSE_RED_NEXT:%[a-z.0-9]+]], %[[FALSE_LATCH:[a-z.0-9]+]]
+; CHECK-NEXT:    [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100
+; CHECK-NEXT:     br i1 false, label  %[[FALSE_CLOBBER:.+]], label %[[FALSE_NOCLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  [[FALSE_ADD5:%.+]] = add i32 [[FALSE_RED]], 5
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_NOCLOBBER]]:
+; CHECK-NEXT:  [[FALSE_ADD10:%.+]] = add i32 [[FALSE_RED]], 10
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_LATCH]]:
+; CHECK-NEXT:   [[FALSE_RED_NEXT]] = phi i32 [ [[FALSE_ADD5]], %[[FALSE_CLOBBER]] ], [ [[FALSE_ADD10]], %[[FALSE_NOCLOBBER]] ]
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[FALSE_HEADER]]
 ;
 entry:
   br label %loop.header
@@ -134,7 +340,59 @@ exit:
 define i32 @partial_unswitch_true_successor_noclobber(i32* noalias %ptr.1, i32* noalias %ptr.2, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_true_successor
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr.1, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]], label %[[SPLIT_TRUE_PH:[a-z._]+]], label %[[FALSE_CRIT:[a-z._]+]]
+
+; CHECK:      [[FALSE_CRIT]]:
+; CHECK-NEXT:   br label %[[FALSE_PH:[a-z.]+]]
+
+; CHECK:      [[SPLIT_TRUE_PH]]:
+; CHECK-NEXT:   br label %[[TRUE_HEADER:[a-z.]+]]
+
+; CHECK: [[TRUE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[TRUE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr.1, align 4
+; CHECK-NEXT:    [[TRUE_C:%[a-z.0-9]+]] = icmp eq i32 [[TRUE_LV]], 100
+; CHECK-NEXT:    br i1 true, label %[[TRUE_NOCLOBBER:.+]], label %[[TRUE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_NOCLOBBER]]:
+; CHECK-NEXT:  [[TRUE_GEP:%[a-z0-9._]+]]  = getelementptr i32, i32* %ptr.2
+; CHECK-NEXT:  store i32 [[TRUE_LV]], i32* [[TRUE_GEP]], align 4
+; CHECK-NEXT:  br label %[[TRUE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[TRUE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[TRUE_HEADER]]
+
+
+; CHECK:      [[FALSE_PH]]:
+; CHECK-NEXT:   br label %[[FALSE_HEADER:[a-z.]+]]
+
+; CHECK: [[FALSE_HEADER]]:
+; CHECK-NEXT:   phi i32
+; CHECK-NEXT:    [[FALSE_LV:%[a-z.0-9]+]] = load i32, i32* %ptr.1, align 4
+; CHECK-NEXT:    [[FALSE_C:%[a-z.0-9]+]] = icmp eq i32 [[FALSE_LV]], 100
+; CHECK-NEXT:     br i1 [[FALSE_C]], label  %[[FALSE_NOCLOBBER:.+]], label %[[FALSE_CLOBBER:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_NOCLOBBER]]:
+; CHECK-NEXT:  [[FALSE_GEP:%[a-z0-9._]+]]  = getelementptr i32, i32* %ptr.2
+; CHECK-NEXT:  store i32 [[FALSE_LV]], i32* [[FALSE_GEP]], align 4
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_CLOBBER]]:
+; CHECK-NEXT:  call
+; CHECK-NEXT:  br label %[[FALSE_LATCH:[a-z0-9._]+]]
+
+; CHECK: [[FALSE_LATCH]]:
+; CHECK-NEXT:   icmp
+; CHECK-NEXT:   add
+; CHECK-NEXT:   br {{.*}} label %[[FALSE_HEADER]]
 ;
 entry:
   br label %loop.header
@@ -321,9 +579,10 @@ exit:
 ; duplicated load being a MemoryPHI outside the loop.
 define void @partial_unswitch_memssa_update(i32* noalias %ptr, i1 %c) {
 ; CHECK-LABEL: @partial_unswitch_memssa_update(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 %c, label %loop.ph, label %outside.clobber
-;
+; CHECK-LABEL: loop.ph:
+; CHECK-NEXT:    [[LV:%[a-z0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:    [[C:%[a-z0-9]+]] = icmp eq i32 [[LV]], 0
+; CHECK-NEXT:    br i1 [[C]]
 entry:
   br i1 %c, label %loop.ph, label %outside.clobber
 
@@ -359,8 +618,13 @@ exit:
 define i32 @partial_unswitch_true_successor_preheader_insertion(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_true_successor_preheader_insertion(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    %ec = icmp ne i32* %ptr, null
-; CHECK-NEXT:    br i1 %ec, label %loop.ph, label %exit
+; CHECK-NEXT:   [[EC:%[a-z]+]] = icmp ne i32* %ptr, null
+; CHECK-NEXT:   br i1 [[EC]], label %[[PH:[a-z0-9.]+]], label %[[EXIT:[a-z0-9.]+]]
+
+; CHECK: [[PH]]:
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]]
 ;
 entry:
   %ec = icmp ne i32* %ptr, null
@@ -397,8 +661,10 @@ exit:
 define i32 @partial_unswitch_true_successor_insert_point(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_true_successor_insert_point(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @clobber()
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   call void @clobber()
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* %ptr, align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]]
 ;
 entry:
   call void @clobber()
@@ -432,7 +698,10 @@ exit:
 define i32 @partial_unswitch_true_successor_hoist_invariant(i32* %ptr, i32 %N) {
 ; CHECK-LABEL: @partial_unswitch_true_successor_hoist_invariant(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label %loop.header
+; CHECK-NEXT:   [[GEP:%[0-9]+]] = getelementptr i32, i32* %ptr, i64 1
+; CHECK-NEXT:   [[LV:%[0-9]+]] = load i32, i32* [[GEP]], align 4
+; CHECK-NEXT:   [[C:%[0-9]+]] = icmp eq i32 [[LV]], 100
+; CHECK-NEXT:   br i1 [[C]]
 ;
 entry:
   br label %loop.header


        


More information about the llvm-branch-commits mailing list