[llvm] 3dcaf29 - [Loop Peeling] Add possibility to enable peeling on loop nests.

Arkady Shlykov via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 2 08:40:49 PST 2020


Author: Arkady Shlykov
Date: 2020-03-02T08:37:11-08:00
New Revision: 3dcaf296aeb0d06bcb8f6e7fb661573d6e01d90c

URL: https://github.com/llvm/llvm-project/commit/3dcaf296aeb0d06bcb8f6e7fb661573d6e01d90c
DIFF: https://github.com/llvm/llvm-project/commit/3dcaf296aeb0d06bcb8f6e7fb661573d6e01d90c.diff

LOG: [Loop Peeling] Add possibility to enable peeling on loop nests.

Summary:
Current peeling implementation bails out in case of loop nests.
The patch introduces a field in TargetTransformInfo structure that
certain targets can use to relax the constraints if it's
profitable (disabled by default).
Also additional option is added to enable peeling manually for
experimenting and testing purposes.

Reviewers: fhahn, lebedev.ri, xbolva00

Reviewed By: xbolva00

Subscribers: RKSimon, xbolva00, hiraditya, zzheng, llvm-commits

Differential Revision: https://reviews.llvm.org/D70304

Added: 
    llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
    llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
    llvm/lib/Transforms/Utils/LoopUtils.cpp
    llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 8a1e720d77f6..2968a5f37a46 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -494,6 +494,8 @@ class TargetTransformInfo {
     bool UpperBound;
     /// Allow peeling off loop iterations.
     bool AllowPeeling;
+    /// Allow peeling off loop iterations for loop nests.
+    bool AllowLoopNestsPeeling;
     /// Allow unrolling of all the iterations of the runtime loop remainder.
     bool UnrollRemainder;
     /// Allow unroll and jam. Used to enable unroll and jam for the target.

diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 3de4318cc7b3..15a3be5487eb 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -31,6 +32,7 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
 
 namespace llvm {
 
@@ -426,6 +428,12 @@ void appendReversedLoopsToWorklist(RangeT &&,
 /// already reversed loops in LI.
 /// FIXME: Consider changing the order in LoopInfo.
 void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
+
+/// Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+                LoopInfo *LI, LPPassManager *LPM);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 15ab2abbc1a8..a1df49300b90 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -154,6 +154,10 @@ static cl::opt<bool>
                        cl::desc("Allows loops to be peeled when the dynamic "
                                 "trip count is known to be low."));
 
+static cl::opt<bool> UnrollAllowLoopNestsPeeling(
+    "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
+    cl::desc("Allows loop nests to be peeled."));
+
 static cl::opt<bool> UnrollUnrollRemainder(
   "unroll-remainder", cl::Hidden,
   cl::desc("Allow the loop remainder to be unrolled."));
@@ -215,6 +219,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
   UP.Force = false;
   UP.UpperBound = false;
   UP.AllowPeeling = true;
+  UP.AllowLoopNestsPeeling = false;
   UP.UnrollAndJam = false;
   UP.PeelProfiledIterations = true;
   UP.UnrollAndJamInnerLoopThreshold = 60;
@@ -255,6 +260,8 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
     UP.UpperBound = false;
   if (UnrollAllowPeeling.getNumOccurrences() > 0)
     UP.AllowPeeling = UnrollAllowPeeling;
+  if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
+    UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
   if (UnrollUnrollRemainder.getNumOccurrences() > 0)
     UP.UnrollRemainder = UnrollUnrollRemainder;
 

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 6889facbd050..5a8127e465e8 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -903,30 +903,6 @@ bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val,
   return true;
 }
 
-/// Recursively clone the specified loop and all of its children,
-/// mapping the blocks with the specified map.
-static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI,
-                       LPPassManager *LPM) {
-  Loop &New = *LI->AllocateLoop();
-  if (PL)
-    PL->addChildLoop(&New);
-  else
-    LI->addTopLevelLoop(&New);
-  LPM->addLoop(New);
-
-  // Add all of the blocks in L to the new loop.
-  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-       I != E; ++I)
-    if (LI->getLoopFor(*I) == L)
-      New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
-
-  // Add all of the subloops to the new loop.
-  for (Loop *I : *L)
-    cloneLoop(I, &New, VM, LI, LPM);
-
-  return &New;
-}
-
 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
 /// otherwise branch to FalseDest. Insert the code immediately before OldBranch
 /// and remove (but not erase!) it from the function.

diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 7a168ff6f32b..afc4bbd7227d 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -289,8 +289,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
   if (!canPeel(L))
     return;
 
-  // Only try to peel innermost loops.
-  if (!L->empty())
+  // Only try to peel innermost loops by default.
+  // The constraint can be relaxed by the target in TTI.getUnrollingPreferences
+  // or by the flag -unroll-allow-loop-nests-peeling.
+  if (!UP.AllowLoopNestsPeeling && !L->empty())
     return;
 
   // If the user provided a peel count, use that.
@@ -508,7 +510,10 @@ static void cloneLoopBlocks(
     BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
     NewBlocks.push_back(NewBB);
 
-    if (ParentLoop)
+    // If an original block is an immediate child of the loop L, its copy
+    // is a child of a ParentLoop after peeling. If a block is a child of
+    // a nested loop, it is handled in the cloneLoop() call below.
+    if (ParentLoop && LI->getLoopFor(*BB) == L)
       ParentLoop->addBasicBlockToLoop(NewBB, *LI);
 
     VMap[*BB] = NewBB;
@@ -525,6 +530,12 @@ static void cloneLoopBlocks(
     }
   }
 
+  // Recursively create the new Loop objects for nested loops, if any,
+  // to preserve LoopInfo.
+  for (Loop *ChildLoop : *L) {
+    cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
+  }
+
   // Hook-up the control flow for the newly inserted blocks.
   // The new header is hooked up directly to the "top", which is either
   // the original loop preheader (for the first iteration) or the previous

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 69020219d9d4..6a8770cb9811 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1499,3 +1499,27 @@ void llvm::appendLoopsToWorklist(LoopInfo &LI,
                                  SmallPriorityWorklist<Loop *, 4> &Worklist) {
   appendReversedLoopsToWorklist(LI, Worklist);
 }
+
+Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+                      LoopInfo *LI, LPPassManager *LPM) {
+  Loop &New = *LI->AllocateLoop();
+  if (PL)
+    PL->addChildLoop(&New);
+  else
+    LI->addTopLevelLoop(&New);
+
+  if (LPM)
+    LPM->addLoop(New);
+
+  // Add all of the blocks in L to the new loop.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    if (LI->getLoopFor(*I) == L)
+      New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+
+  // Add all of the subloops to the new loop.
+  for (Loop *I : *L)
+    cloneLoop(I, &New, VM, LI, LPM);
+
+  return &New;
+}

diff  --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll
index 5c84884c66de..f0fbf3d6d49b 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll
@@ -403,76 +403,11 @@ for.end:
   ret void
 }
 
-; In this case we cannot peel the inner loop, because the condition involves
-; the outer induction variable.
-define void @test5(i32 %k) {
-; CHECK-LABEL: @test5(
-; CHECK-NEXT:  for.body.lr.ph:
-; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
-; CHECK:       outer.header:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[J]], 2
-; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    call void @f1()
-; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       if.else:
-; CHECK-NEXT:    call void @f2()
-; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       for.inc:
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
-; CHECK:       outer.inc:
-; CHECK-NEXT:    [[J_INC]] = add nsw i32 [[J]], 1
-; CHECK-NEXT:    [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
-; CHECK-NEXT:    br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
-; CHECK:       for.end:
-; CHECK-NEXT:    ret void
-;
-for.body.lr.ph:
-  br label %outer.header
-
-outer.header:
-  %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
-  br label %for.body
-
-for.body:
-  %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
-  %cmp1 = icmp ult i32 %j, 2
-  br i1 %cmp1, label %if.then, label %if.else
-
-if.then:
-  call void @f1()
-  br label %for.inc
-
-if.else:
-  call void @f2()
-  br label %for.inc
-
-for.inc:
-  %inc = add nsw i32 %i.05, 1
-  %cmp = icmp slt i32 %inc, %k
-  br i1 %cmp, label %for.body, label %outer.inc
-
-outer.inc:
-  %j.inc = add nsw i32 %j, 1
-  %outer.cmp = icmp slt i32 %j.inc, %k
-  br i1 %outer.cmp, label %outer.header, label %for.end
-
-
-for.end:
-  ret void
-}
-
 ; In this test, the condition involves 2 AddRecs. Without evaluating both
 ; AddRecs, we cannot prove that the condition becomes known in the loop body
 ; after peeling.
-define void @test6(i32 %k) {
-; CHECK-LABEL: @test6(
+define void @test5(i32 %k) {
+; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
@@ -521,8 +456,8 @@ for.end:
   ret void
 }
 
-define void @test7(i32 %k) {
-; CHECK-LABEL: @test7(
+define void @test6(i32 %k) {
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
 ; CHECK:       for.body.peel.begin:
@@ -615,8 +550,8 @@ for.end:
   ret void
 }
 
-define void @test8(i32 %k) {
-; CHECK-LABEL: @test8(
+define void @test7(i32 %k) {
+; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY_PEEL_BEGIN:%.*]]
 ; CHECK:       for.body.peel.begin:
@@ -711,8 +646,8 @@ for.end:
 
 ; Comparison with non-monotonic predicate due to possible wrapping, loop
 ; body cannot be simplified.
-define void @test9(i32 %k) {
-; CHECK-LABEL: @test9(
+define void @test8(i32 %k) {
+; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
@@ -751,8 +686,8 @@ for.end:
 }
 ; CHECK-NOT: llvm.loop.unroll.disable
 
-define void @test_10__peel_first_iter_via_slt_pred(i32 %len) {
-; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred(
+define void @test_9__peel_first_iter_via_slt_pred(i32 %len) {
+; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -818,8 +753,8 @@ if.end:                                           ; preds = %if.then, %for.body
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) {
-; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred(
+define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) {
+; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -887,8 +822,8 @@ if.end:                                           ; preds = %if.then, %for.body
 
 ; NOTE: here we should only peel the first iteration,
 ;       i.e. all calls to sink() must stay in loop.
-define void @test12__peel_first_iter_via_eq_pred(i32 %len) {
-; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred(
+define void @test11__peel_first_iter_via_eq_pred(i32 %len) {
+; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -956,8 +891,8 @@ if.end:                                           ; preds = %if.then, %for.body
 
 ; NOTE: here we should only peel the first iteration,
 ;       i.e. all calls to sink() must stay in loop.
-define void @test13__peel_first_iter_via_ne_pred(i32 %len) {
-; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred(
+define void @test12__peel_first_iter_via_ne_pred(i32 %len) {
+; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1024,8 +959,8 @@ if.end:                                           ; preds = %if.then, %for.body
 }
 
 ; No peeling is profitable here.
-define void @test14__ivar_mod2_is_1(i32 %len) {
-; CHECK-LABEL: @test14__ivar_mod2_is_1(
+define void @test13__ivar_mod2_is_1(i32 %len) {
+; CHECK-LABEL: @test13__ivar_mod2_is_1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1074,8 +1009,8 @@ if.end:                                           ; preds = %if.then, %for.body
 }
 
 ; No peeling is profitable here.
-define void @test15__ivar_mod2_is_0(i32 %len) {
-; CHECK-LABEL: @test15__ivar_mod2_is_0(
+define void @test14__ivar_mod2_is_0(i32 %len) {
+; CHECK-LABEL: @test14__ivar_mod2_is_0(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -1123,10 +1058,10 @@ if.end:                                           ; preds = %if.then, %for.body
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; Similar to @test7, we need to peel one extra iteration, and we can't do that
+; Similar to @test6, we need to peel one extra iteration, and we can't do that
 ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
-define void @test16(i32 %k) {
-; CHECK-LABEL: @test16(
+define void @test15(i32 %k) {
+; CHECK-LABEL: @test15(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
@@ -1164,10 +1099,10 @@ for.end:
   ret void
 }
 
-; Similar to @test8, we need to peel one extra iteration, and we can't do that
+; Similar to @test7, we need to peel one extra iteration, and we can't do that
 ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
-define void @test17(i32 %k) {
-; CHECK-LABEL: @test17(
+define void @test16(i32 %k) {
+; CHECK-LABEL: @test16(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:

diff  --git a/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll
new file mode 100644
index 000000000000..dc1d9be86073
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix PEELED
+
+declare void @f1()
+declare void @f2()
+
+; In this case we cannot peel the inner loop, because the condition involves
+; the outer induction variable.
+; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling.
+define void @test1(i32 %k) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  for.body.lr.ph:
+; CHECK-NEXT:    br label [[OUTER_HEADER:%.*]]
+; CHECK:       outer.header:
+; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i32 [[J]], 2
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @f1()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       if.else:
+; CHECK-NEXT:    call void @f2()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
+; CHECK:       outer.inc:
+; CHECK-NEXT:    [[J_INC]] = add nsw i32 [[J]], 1
+; CHECK-NEXT:    [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
+; CHECK-NEXT:    br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]], !llvm.loop !{{.*}}
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+; PEELED-LABEL: @test1(
+; PEELED-NEXT:  for.body.lr.ph:
+; PEELED-NEXT:    br label [[OUTER_HEADER_PEEL_BEGIN:%.*]]
+; PEELED:       outer.header.peel.begin:
+; PEELED-NEXT:    br label [[OUTER_HEADER_PEEL:%.*]]
+; PEELED:       outer.header.peel:
+; PEELED-NEXT:    br label [[FOR_BODY_PEEL:%.*]]
+; PEELED:       for.body.peel:
+; PEELED-NEXT:    [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ]
+; PEELED-NEXT:    [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
+; PEELED-NEXT:    br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]]
+; PEELED:       if.else.peel:
+; PEELED-NEXT:    call void @f2()
+; PEELED-NEXT:    br label [[FOR_INC_PEEL]]
+; PEELED:       if.then.peel:
+; PEELED-NEXT:    call void @f1()
+; PEELED-NEXT:    br label [[FOR_INC_PEEL]]
+; PEELED:       for.inc.peel:
+; PEELED-NEXT:    [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1
+; PEELED-NEXT:    [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]]
+; PEELED-NEXT:    br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]]
+; PEELED:       outer.inc.peel:
+; PEELED-NEXT:    [[J_INC_PEEL:%.*]] = add nsw i32 0, 1
+; PEELED-NEXT:    [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]]
+; PEELED-NEXT:    br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]]
+; Verify that MD_loop metadata is dropped.
+; PEELED-NOT:   , !llvm.loop !{{[0-9]*}}
+; PEELED:       outer.header.peel.next:
+; PEELED-NEXT:    br label [[OUTER_HEADER_PEEL2:%.*]]
+; PEELED:       outer.header.peel2:
+; PEELED-NEXT:    br label [[FOR_BODY_PEEL3:%.*]]
+; PEELED:       for.body.peel3:
+; PEELED-NEXT:    [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ]
+; PEELED-NEXT:    [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2
+; PEELED-NEXT:    br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]]
+; PEELED:       if.else.peel6:
+; PEELED-NEXT:    call void @f2()
+; PEELED-NEXT:    br label [[FOR_INC_PEEL8]]
+; PEELED:       if.then.peel7:
+; PEELED-NEXT:    call void @f1()
+; PEELED-NEXT:    br label [[FOR_INC_PEEL8]]
+; PEELED:       for.inc.peel8:
+; PEELED-NEXT:    [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1
+; PEELED-NEXT:    [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]]
+; PEELED-NEXT:    br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]]
+; PEELED:       outer.inc.peel11:
+; PEELED-NEXT:    [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1
+; PEELED-NEXT:    [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]]
+; PEELED-NEXT:    br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]]
+; Verify that MD_loop metadata is dropped.
+; PEELED-NOT:   , !llvm.loop !{{[0-9]*}}
+; PEELED:       outer.header.peel.next1:
+; PEELED-NEXT:    br label [[OUTER_HEADER_PEEL_NEXT14:%.*]]
+; PEELED:       outer.header.peel.next14:
+; PEELED-NEXT:    br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
+; PEELED:       for.body.lr.ph.peel.newph:
+; PEELED-NEXT:    br label [[OUTER_HEADER:%.*]]
+; PEELED:       outer.header:
+; PEELED-NEXT:    [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ]
+; PEELED-NEXT:    br label [[FOR_BODY:%.*]]
+; PEELED:       for.body:
+; PEELED-NEXT:    [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; PEELED-NEXT:    br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; PEELED:       if.then:
+; PEELED-NEXT:    call void @f1()
+; PEELED-NEXT:    br label [[FOR_INC]]
+; PEELED:       if.else:
+; PEELED-NEXT:    call void @f2()
+; PEELED-NEXT:    br label [[FOR_INC]]
+; PEELED:       for.inc:
+; PEELED-NEXT:    [[INC]] = add nsw i32 [[I_05]], 1
+; PEELED-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
+; PEELED-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
+; PEELED:       outer.inc:
+; PEELED-NEXT:    [[J_INC]] = add nuw nsw i32 [[J]], 1
+; PEELED-NEXT:    [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
+; PEELED-NEXT:    br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}}
+; PEELED:       for.end.loopexit:
+; PEELED-NEXT:    br label [[FOR_END]]
+; PEELED:       for.end:
+; PEELED-NEXT:    ret void
+;
+for.body.lr.ph:
+  br label %outer.header
+
+outer.header:
+  %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
+  %cmp1 = icmp ult i32 %j, 2
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:
+  call void @f1()
+  br label %for.inc
+
+if.else:
+  call void @f2()
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %inc, %k
+  br i1 %cmp, label %for.body, label %outer.inc
+
+outer.inc:
+  %j.inc = add nsw i32 %j, 1
+  %outer.cmp = icmp slt i32 %j.inc, %k
+  br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0
+
+for.end:
+  ret void
+}
+
+!0 = distinct !{!0}


        


More information about the llvm-commits mailing list