[llvm] dd3ca65 - Sinking or hoisting instructions between loops before fusion

Aaron Ballman via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 27 03:56:55 PDT 2022


Author: Aaron Kogon
Date: 2022-07-27T06:55:09-04:00
New Revision: dd3ca65c378f87524384c3a036bd6ed953c1a8c3

URL: https://github.com/llvm/llvm-project/commit/dd3ca65c378f87524384c3a036bd6ed953c1a8c3
DIFF: https://github.com/llvm/llvm-project/commit/dd3ca65c378f87524384c3a036bd6ed953c1a8c3.diff

LOG: Sinking or hoisting instructions between loops before fusion

Instructions between two adjacent loops will be hoisted above the first
loop, or sunk below the second to facilitate loop fusion. Hoisting will
be attempted for an instruction that dominates the first loop.
Otherwise, sinking this instructions will be attempted.

Instructions with side effects will not be considered for sinking or
hoisting. Hoisting/sinking of any instructions between loops will only
be performed if all the instructions can be moved. As well,
sinking/hoisting is considered for each instruction in isolation,
without taking into account sinking/hoisting decisions for other
instructions in the preheader.

Differential Revision: https://reviews.llvm.org/D118076

Added: 
    llvm/test/Transforms/LoopFusion/hoist_preheader.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_inner_barrier.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
    llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
    llvm/test/Transforms/LoopFusion/sink_preheader.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopFuse.cpp
    llvm/test/Transforms/LoopFusion/simple.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index d94b767c7b63..da752c977f0e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -101,6 +101,8 @@ STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block with "
 STATISTIC(NotRotated, "Candidate is not rotated");
 STATISTIC(OnlySecondCandidateIsGuarded,
           "The second candidate is guarded while the first one is not");
+STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
+STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
 
 enum FusionDependenceAnalysisChoice {
   FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -183,9 +185,8 @@ struct FusionCandidate {
 
   OptimizationRemarkEmitter &ORE;
 
-  FusionCandidate(Loop *L, DominatorTree &DT,
-                  const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
-                  TTI::PeelingPreferences PP)
+  FusionCandidate(Loop *L, DominatorTree &DT, const PostDominatorTree *PDT,
+                  OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP)
       : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
         ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
         Latch(L->getLoopLatch()), L(L), Valid(true),
@@ -549,7 +550,6 @@ struct LoopFuser {
   PostDominatorTree &PDT;
   OptimizationRemarkEmitter &ORE;
   AssumptionCache ∾
-
   const TargetTransformInfo &TTI;
 
 public:
@@ -807,7 +807,7 @@ struct LoopFuser {
         }
         // Cannot modify the predecessors inside the above loop as it will cause
         // the iterators to be nullptrs, causing memory errors.
-        for (Instruction *CurrentBranch: WorkList) {
+        for (Instruction *CurrentBranch : WorkList) {
           BasicBlock *Succ = CurrentBranch->getSuccessor(0);
           if (Succ == BB)
             Succ = CurrentBranch->getSuccessor(1);
@@ -914,16 +914,6 @@ struct LoopFuser {
             continue;
           }
 
-          if (!isSafeToMoveBefore(*FC1->Preheader,
-                                  *FC0->Preheader->getTerminator(), DT, &PDT,
-                                  &DI)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
-                                 "instructions in preheader. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonEmptyPreheader);
-            continue;
-          }
-
           if (FC0->GuardBranch) {
             assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
 
@@ -959,6 +949,31 @@ struct LoopFuser {
             continue;
           }
 
+          // If the second loop has instructions in the pre-header, attempt to
+          // hoist them up to the first loop's pre-header or sink them into the
+          // body of the second loop.
+          SmallVector<Instruction *, 4> SafeToHoist;
+          SmallVector<Instruction *, 4> SafeToSink;
+          // At this point, this is the last remaining legality check.
+          // Which means if we can make this pre-header empty, we can fuse
+          // these loops
+          if (!isEmptyPreheader(*FC1)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+                                 "preheader.\n");
+
+            // If it is not safe to hoist/sink all instructions in the
+            // pre-header, we cannot fuse these loops.
+            if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
+                                              SafeToSink)) {
+              LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
+                                   "Fusion Candidate Pre-header.\n"
+                                << "Not Fusing.\n");
+              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                         NonEmptyPreheader);
+              continue;
+            }
+          }
+
           bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
           LLVM_DEBUG(dbgs()
                      << "\tFusion appears to be "
@@ -972,6 +987,9 @@ struct LoopFuser {
           // and profitable. At this point, start transforming the code and
           // perform fusion.
 
+          // Execute the hoist/sink operations on preheader instructions
+          movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
+
           LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
                             << *FC1 << "\n");
 
@@ -1022,6 +1040,74 @@ struct LoopFuser {
     return Fused;
   }
 
+  /// Collect instructions in the \p FC1 Preheader that can be hoisted
+  /// to the \p FC0 Preheader or sunk into the \p FC1 Body
+  bool collectMovablePreheaderInsts(
+      const FusionCandidate &FC0, const FusionCandidate &FC1,
+      SmallVector<Instruction *, 4> &SafeToHoist,
+      SmallVector<Instruction *, 4> &SafeToSink) const {
+    BasicBlock *FC1Preheader = FC1.Preheader;
+    for (Instruction &I : *FC1Preheader) {
+      // Can't move a branch
+      if (&I == FC1Preheader->getTerminator())
+        continue;
+      // If the instruction has side-effects, give up.
+      // TODO: The case of mayReadFromMemory we can handle but requires
+      // additional work with a dependence analysis so for now we give
+      // up on memory reads.
+      if (I.mayHaveSideEffects() || I.mayReadFromMemory()) {
+        LLVM_DEBUG(dbgs() << "Inst: " << I << " may have side-effects.\n");
+        return false;
+      }
+
+      LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
+
+      // First check if can be hoisted
+      // If the operands of this instruction dominate the FC0 Preheader
+      // target block, then it is safe to move them to the end of the FC0
+      const BasicBlock *FC0PreheaderTarget =
+          FC0.Preheader->getSingleSuccessor();
+      assert(FC0PreheaderTarget &&
+             "Expected single successor for loop preheader.");
+      bool CanHoistInst = true;
+      for (Use &Op : I.operands()) {
+        if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+          bool OpHoisted = is_contained(SafeToHoist, OpInst);
+          // Check if we have already decided to hoist this operand. In this
+          // case, it does not dominate FC0 *yet*, but will after we hoist it.
+          if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
+            CanHoistInst = false;
+            break;
+          }
+        }
+      }
+      if (CanHoistInst) {
+        SafeToHoist.push_back(&I);
+        LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
+      } else {
+        LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
+
+        for (User *U : I.users()) {
+          if (auto *UI{dyn_cast<Instruction>(U)}) {
+            // Cannot sink if user in loop
+            // If FC1 has phi users of this value, we cannot sink it into FC1.
+            if (FC1.L->contains(UI)) {
+              // Cannot hoist or sink this instruction. No hoisting/sinking
+              // should take place, loops should not fuse
+              LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
+              return false;
+            }
+          }
+        }
+        SafeToSink.push_back(&I);
+        LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
+      }
+    }
+    LLVM_DEBUG(
+        dbgs() << "All preheader instructions could be sunk or hoisted!\n");
+    return true;
+  }
+
   /// Rewrite all additive recurrences in a SCEV to use a new loop.
   class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
   public:
@@ -1235,6 +1321,47 @@ struct LoopFuser {
       return FC0.ExitBlock == FC1.getEntryBlock();
   }
 
+  bool isEmptyPreheader(const FusionCandidate &FC) const {
+    return FC.Preheader->size() == 1;
+  }
+
+  /// Hoist \p FC1 Preheader instructions to \p FC0 Preheader
+  /// and sink others into the body of \p FC1.
+  void movePreheaderInsts(const FusionCandidate &FC0,
+                          const FusionCandidate &FC1,
+                          SmallVector<Instruction *, 4> &HoistInsts,
+                          SmallVector<Instruction *, 4> &SinkInsts) const {
+
+    // All preheader instructions except the branch must be hoisted or sunk
+    assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
+           "Attempting to sink and hoist preheader instructions, but not all "
+           "the preheader instructions are accounted for.");
+
+    NumHoistedInsts += HoistInsts.size();
+    NumSunkInsts += SinkInsts.size();
+
+    LLVM_DEBUG(if (VerboseFusionDebugging) {
+      if (!HoistInsts.empty())
+        dbgs() << "Hoisting: \n";
+      for (Instruction *I : HoistInsts)
+        dbgs() << *I << "\n";
+      if (!SinkInsts.empty())
+        dbgs() << "Sinking: \n";
+      for (Instruction *I : SinkInsts)
+        dbgs() << *I << "\n";
+    });
+
+    for (Instruction *I : HoistInsts) {
+      assert(I->getParent() == FC1.Preheader);
+      I->moveBefore(FC0.Preheader->getTerminator());
+    }
+    // insert instructions in reverse order to maintain dominance relationship
+    for (Instruction *I : reverse(SinkInsts)) {
+      assert(I->getParent() == FC1.Preheader);
+      I->moveBefore(&*FC1.ExitBlock->getFirstInsertionPt());
+    }
+  }
+
   /// Determine if two fusion candidates have identical guards
   ///
   /// This method will determine if two fusion candidates have the same guards.
@@ -1838,6 +1965,7 @@ struct LoopFuseLegacy : public FunctionPass {
   bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
+
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();

diff  --git a/llvm/test/Transforms/LoopFusion/hoist_preheader.ll b/llvm/test/Transforms/LoopFusion/hoist_preheader.ll
new file mode 100644
index 000000000000..a53038707442
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/hoist_preheader.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -loop-fusion < %s | FileCheck %s
+
+define void @hoist_preheader(i32 %N) {
+
+; CHECK:pre1:
+; CHECK-NEXT:  %hoistme = add i32 1, %N
+; CHECK-NEXT:  %hoistme2 = add i32 1, %hoistme
+; CHECK-NEXT:  br label %body1
+pre1:
+  br label %body1
+
+; CHECK: body1:
+; CHECK-NOT:  %hoistme
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  %hoistme = add i32 1, %N
+  %hoistme2 = add i32 1, %hoistme
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist.ll
new file mode 100644
index 000000000000..f53fc1ac3b59
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  br label %body1
+pre1:
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT:  %stay =
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  %barrier = add i32 1, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK: %stay =
+pre2:
+  %stay = add i32 1, %barrier
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  %barrier2 = add i32 1, %stay
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_inner_barrier.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_inner_barrier.ll
new file mode 100644
index 000000000000..7e8f77c41f0b
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_inner_barrier.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  br label %body1
+pre1:
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT: %no_hoist =
+; CHECK-NOT: %no_hoist_sink =
+; CHECK-NOT: %no_sink =
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  %barrier = add i32 1, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK: %no_hoist =
+; CHECK: %no_hoist_sink =
+; CHECK: %no_sink =
+pre2:
+  %no_hoist = add i32 1, %barrier
+  %no_hoist_sink = add i32 1, %no_hoist
+  %no_sink = add i32 1, %no_hoist_sink
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %no_hoist =
+; CHECK-NOT: %no_hoist_sink =
+; CHECK-NOT: %no_sink =
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  %barrier2 = add i32 1, %no_sink
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
new file mode 100644
index 000000000000..ee77c50d6e1e
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_store.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: have side-effects.
+; CHECK: Could not hoist/sink all instructions
+
+ at A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  %ptr = alloca i32, align 4
+; CHECK-NEXT:  br label %body1
+pre1:
+  %ptr = alloca i32
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT:  %stay =
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store i32 3, i32* %ptr
+pre2:
+  store i32 3, i32* %ptr
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
new file mode 100644
index 000000000000..b89043a5cfdc
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/no_sink_hoist_unknown_function.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: may have side-effects
+; CHECK: Could not hoist/sink all instructions
+
+declare void @unknown_func()
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT:  br label %body1
+pre1:
+  br label %body1
+
+; CHECK:body1: 
+; CHECK-NOT:  %stay =
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: call void @unknown_func()
+pre2:
+  call void @unknown_func()
+  br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index f3ccb0f919e3..9d6b6d29f3d5 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -487,8 +487,9 @@ for.second.exit:
 }
 
 ; Test that `%add` cannot be moved to basic block entry, as it uses %i, which
-; defined after basic block entry. And the two loops for.first and for.second
-; are not fused.
+; defined after basic block entry. It also cannot be moved to for.second.exit 
+; since it is used in for.second. Check also that the two loops for.first and
+; for.second are not fused.
 
 define i64 @unsafe_preheader(i32* %A, i64 %x) {
 ; CHECK-LABEL: @unsafe_preheader(
@@ -505,7 +506,7 @@ define i64 @unsafe_preheader(i32* %A, i64 %x) {
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[X:%.*]], [[I]]
 ; CHECK-NEXT:    br label [[FOR_SECOND:%.*]]
 ; CHECK:       for.second:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 0, [[FOR_FIRST_EXIT]] ], [ [[INC_J:%.*]], [[FOR_SECOND]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[ADD]], [[FOR_FIRST_EXIT]] ], [ [[INC_J:%.*]], [[FOR_SECOND]] ]
 ; CHECK-NEXT:    [[AJ:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[J]]
 ; CHECK-NEXT:    store i32 2, i32* [[AJ]], align 4
 ; CHECK-NEXT:    [[INC_J]] = add nsw i64 [[J]], 1
@@ -530,7 +531,7 @@ for.first.exit:
   br label %for.second
 
 for.second:
-  %j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
+  %j = phi i64 [ %add, %for.first.exit ], [ %inc.j, %for.second ]
   %Aj = getelementptr inbounds i32, i32* %A, i64 %j
   store i32 2, i32* %Aj, align 4
   %inc.j = add nsw i64 %j, 1

diff  --git a/llvm/test/Transforms/LoopFusion/sink_preheader.ll b/llvm/test/Transforms/LoopFusion/sink_preheader.ll
new file mode 100644
index 000000000000..055dfa19572a
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/sink_preheader.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -loop-fusion < %s | FileCheck %s
+
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT:  pre1:
+; CHECK-NEXT:    br label [[BODY1:%.*]]
+; CHECK:       body1:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT:    [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT:    [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT:    [[BARRIER:%.*]] = add i32 1, [[N]]
+; CHECK-NEXT:    [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT:    [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT:    br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SINKME:%.*]] = add i32 1, [[BARRIER]]
+; CHECK-NEXT:    [[SINKME2:%.*]] = add i32 1, [[BARRIER]]
+; CHECK-NEXT:    [[SINKME3:%.*]] = add i32 1, [[SINKME2]]
+; CHECK-NEXT:    ret void
+;
+pre1:
+  br label %body1
+
+body1:  ; preds = %pre1, %body1
+  %i = phi i32 [%i_next, %body1], [0, %pre1]
+  %i_next = add i32 1, %i
+  %cond = icmp ne i32 %i, %N
+  %barrier = add i32 1, %N
+  br i1 %cond, label %body1, label %pre2
+
+pre2:
+  %sinkme = add i32 1, %barrier
+  %sinkme2 = add i32 1, %barrier
+  %sinkme3 = add i32 1, %sinkme2
+  br label %body2
+
+body2:  ; preds = %pre2, %body2
+  %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+  %i_next2 = add i32 1, %i2
+  %cond2 = icmp ne i32 %i2, %N
+  br i1 %cond2, label %body2, label %exit
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list