[llvm] cd0ba9d - [LoopPeel] Peel if it turns invariant loads dereferenceable.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 12 03:42:50 PDT 2021
Author: Florian Hahn
Date: 2021-10-12T11:42:28+01:00
New Revision: cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83
URL: https://github.com/llvm/llvm-project/commit/cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83
DIFF: https://github.com/llvm/llvm-project/commit/cd0ba9dc58c5806f4e3cc9635ab1f64af6973a83.diff
LOG: [LoopPeel] Peel if it turns invariant loads dereferenceable.
This patch adds a new cost heuristic that allows peeling a single
iteration off read-only loops, if the loop contains a load that
1. is feeding an exit condition,
2. dominates the latch,
3. is not already known to be dereferenceable,
4. and has a loop invariant address.
If all non-latch exits are terminated with unreachable, such loads
in the loop are guaranteed to be dereferenceable after peeling,
enabling hoisting/CSE'ing them.
This enables vectorization of loops with certain runtime-checks, like
multiple calls to `std::vector::at` if the vector is passed as pointer.
Reviewed By: mkazantsev
Differential Revision: https://reviews.llvm.org/D108114
Added:
Modified:
llvm/include/llvm/Transforms/Utils/LoopPeel.h
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/lib/Transforms/Utils/LoopPeel.cpp
llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll
llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 8f857e1e5c215..6f1b4a8804579 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -32,8 +32,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold = UINT_MAX);
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold = UINT_MAX);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d1605c5c7b602..67702520511b8 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -992,7 +992,7 @@ bool llvm::computeUnrollCount(
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index a6cdce1f4b8f0..e9f6af81066cf 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -165,6 +166,66 @@ static unsigned calculateIterationsToInvariance(
return ToInvariance;
}
+// Try to find any invariant memory reads that will become dereferenceable in
+// the remainder loop after peeling. The load must also be used (transitively)
+// by an exit condition. Returns the number of iterations to peel off (at the
+// moment either 0 or 1).
+static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
+ DominatorTree &DT) {
+ // Skip loops with a single exiting block, because there should be no benefit
+ // for the heuristic below.
+ if (L.getExitingBlock())
+ return 0;
+
+ // All non-latch exit blocks must have an UnreachableInst terminator.
+ // Otherwise the heuristic below may not be profitable.
+ SmallVector<BasicBlock *, 4> Exits;
+ L.getUniqueNonLatchExitBlocks(Exits);
+ if (any_of(Exits, [](const BasicBlock *BB) {
+ return !isa<UnreachableInst>(BB->getTerminator());
+ }))
+ return 0;
+
+ // Now look for invariant loads that dominate the latch and are not known to
+ // be dereferenceable. If there are such loads and no writes, they will become
+ // dereferenceable in the loop if the first iteration is peeled off. Also
+ // collect the set of instructions controlled by such loads. Only peel if an
+ // exit condition uses (transitively) such a load.
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Latch = L.getLoopLatch();
+ SmallPtrSet<Value *, 8> LoadUsers;
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ if (I.mayWriteToMemory())
+ return 0;
+
+ auto Iter = LoadUsers.find(&I);
+ if (Iter != LoadUsers.end()) {
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ // Do not look for reads in the header; they can already be hoisted
+ // without peeling.
+ if (BB == Header)
+ continue;
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ Value *Ptr = LI->getPointerOperand();
+ if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
+ !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ }
+ }
+ SmallVector<BasicBlock *> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ for (BasicBlock *Exiting : ExitingBlocks)
+ if (LoadUsers.find(Exiting->getTerminator()) != LoadUsers.end())
+ return 1;
+ return 0;
+}
+
// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
@@ -280,8 +341,8 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold) {
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -348,6 +409,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DesiredPeelCount = std::max(DesiredPeelCount,
countToEliminateCompares(*L, MaxPeelCount, SE));
+ if (DesiredPeelCount == 0)
+ DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
+
if (DesiredPeelCount > 0) {
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll b/llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll
index c659550ddf92f..c7913e551f344 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll
+++ b/llvm/test/Transforms/LoopUnroll/peel-to-turn-invariant-accesses-dereferenceable.ll
@@ -6,15 +6,99 @@ declare void @foo()
define i32 @peel_readonly_to_make_loads_derefenceable(i32* %ptr, i32 %N, i32* %inv, i1 %c.1) {
; CHECK-LABEL: @peel_readonly_to_make_loads_derefenceable(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]]
+; CHECK: loop.header.peel.begin:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]]
+; CHECK: loop.header.peel:
+; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN_PEEL:%.*]], label [[UNREACHABLE_EXIT:%.*]]
+; CHECK: then.peel:
+; CHECK-NEXT: [[I_PEEL:%.*]] = load i32, i32* [[INV:%.*]], align 4
+; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp ult i32 [[I_PEEL]], 2
+; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[LOOP_LATCH_PEEL:%.*]], label [[UNREACHABLE_EXIT]]
+; CHECK: loop.latch.peel:
+; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[LV_PEEL:%.*]] = load i32, i32* [[GEP_PEEL]], align 4
+; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i32 0, [[LV_PEEL]]
+; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1
+; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000
+; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.header.peel.next:
+; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]]
+; CHECK: loop.header.peel.next1:
+; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK: entry.peel.newph:
+; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: loop.header:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[UNREACHABLE_EXIT_LOOPEXIT:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[INV]], align 4
+; CHECK-NEXT: [[C_2:%.*]] = icmp ult i32 [[I]], 2
+; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[UNREACHABLE_EXIT_LOOPEXIT]]
+; CHECK: loop.latch:
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]]
+; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4
+; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM]], [[LV]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000
+; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: exit.loopexit:
+; CHECK-NEXT: [[SUM_NEXT_LCSSA_PH:%.*]] = phi i32 [ [[SUM_NEXT]], [[LOOP_LATCH]] ]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT_PEEL]], [[LOOP_LATCH_PEEL]] ], [ [[SUM_NEXT_LCSSA_PH]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]]
+; CHECK: unreachable.exit.loopexit:
+; CHECK-NEXT: br label [[UNREACHABLE_EXIT]]
+; CHECK: unreachable.exit:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: unreachable
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
+ %sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ]
+ br i1 %c.1, label %then, label %unreachable.exit
+
+then:
+ %i = load i32, i32* %inv
+ %c.2 = icmp ult i32 %i, 2
+ br i1 %c.2, label %loop.latch, label %unreachable.exit
+
+loop.latch:
+ %gep = getelementptr i32, i32* %ptr, i32 %iv
+ %lv = load i32, i32* %gep
+ %sum.next = add i32 %sum, %lv
+ %iv.next = add nuw nsw i32 %iv, 1
+ %c.3 = icmp ult i32 %iv, 1000
+ br i1 %c.3, label %loop.header, label %exit
+
+exit:
+ ret i32 %sum.next
+
+unreachable.exit:
+ call void @foo()
+ unreachable
+}
+
+define i32 @peel_readonly_to_make_loads_derefenceable_exits_lead_to_unreachable(i32* %ptr, i32 %N, i32* %inv, i1 %c.1) {
+; CHECK-LABEL: @peel_readonly_to_make_loads_derefenceable_exits_lead_to_unreachable(
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[UNREACHABLE_EXIT:%.*]]
+; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[EXIT_2:%.*]]
; CHECK: then:
; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[INV:%.*]], align 4
; CHECK-NEXT: [[C_2:%.*]] = icmp ult i32 [[I]], 2
-; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[UNREACHABLE_EXIT]]
+; CHECK-NEXT: br i1 [[C_2]], label [[THEN_2:%.*]], label [[EXIT_2]]
+; CHECK: then.2:
+; CHECK-NEXT: [[C_4:%.*]] = icmp ult i32 [[I]], 4
+; CHECK-NEXT: br i1 [[C_4]], label [[LOOP_LATCH]], label [[EXIT_3:%.*]]
; CHECK: loop.latch:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4
@@ -25,7 +109,11 @@ define i32 @peel_readonly_to_make_loads_derefenceable(i32* %ptr, i32 %N, i32* %i
; CHECK: exit:
; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], [[LOOP_LATCH]] ]
; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]]
-; CHECK: unreachable.exit:
+; CHECK: exit.2:
+; CHECK-NEXT: br label [[UNREACHABLE_BB:%.*]]
+; CHECK: exit.3:
+; CHECK-NEXT: br label [[UNREACHABLE_BB]]
+; CHECK: unreachable.bb:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: unreachable
;
@@ -35,12 +123,16 @@ entry:
loop.header:
%iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ]
- br i1 %c.1, label %then, label %unreachable.exit
+ br i1 %c.1, label %then, label %exit.2
then:
%i = load i32, i32* %inv
%c.2 = icmp ult i32 %i, 2
- br i1 %c.2, label %loop.latch, label %unreachable.exit
+ br i1 %c.2, label %then.2, label %exit.2
+
+then.2:
+ %c.4 = icmp ult i32 %i, 4
+ br i1 %c.4, label %loop.latch, label %exit.3
loop.latch:
%gep = getelementptr i32, i32* %ptr, i32 %iv
@@ -53,7 +145,13 @@ loop.latch:
exit:
ret i32 %sum.next
-unreachable.exit:
+exit.2:
+ br label %unreachable.bb
+
+exit.3:
+ br label %unreachable.bb
+
+unreachable.bb:
call void @foo()
unreachable
}
@@ -302,18 +400,18 @@ unreachable.exit:
declare i32 @llvm.experimental.deoptimize.i32(...)
-define i32 @do_not_peel_with_deopt_exit(i32* %ptr, i32 %N, i32* %inv, i1 %c.1) {
-; CHECK-LABEL: @do_not_peel_with_deopt_exit(
+define i32 @peel_with_deopt_exit(i32* %ptr, i32 %N, i32* %inv, i1 %c.1) {
+; CHECK-LABEL: @peel_with_deopt_exit(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ]
-; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[UNREACHABLE_EXIT:%.*]]
+; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[DEOPT_EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[INV:%.*]], align 4
; CHECK-NEXT: [[C_2:%.*]] = icmp ult i32 [[I]], 2
-; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[UNREACHABLE_EXIT]]
+; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[DEOPT_EXIT]]
; CHECK: loop.latch:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]]
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[GEP]], align 4
@@ -324,7 +422,7 @@ define i32 @do_not_peel_with_deopt_exit(i32* %ptr, i32 %N, i32* %inv, i1 %c.1) {
; CHECK: exit:
; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], [[LOOP_LATCH]] ]
; CHECK-NEXT: ret i32 [[SUM_NEXT_LCSSA]]
-; CHECK: unreachable.exit:
+; CHECK: deopt.exit:
; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[THEN]] ], [ [[SUM]], [[LOOP_HEADER]] ]
; CHECK-NEXT: [[RVAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[SUM_LCSSA]]) ]
; CHECK-NEXT: ret i32 [[RVAL]]
@@ -335,12 +433,12 @@ entry:
loop.header:
%iv = phi i32 [ 1, %entry ], [ %iv.next, %loop.latch ]
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop.latch ]
- br i1 %c.1, label %then, label %unreachable.exit
+ br i1 %c.1, label %then, label %deopt.exit
then:
%i = load i32, i32* %inv
%c.2 = icmp ult i32 %i, 2
- br i1 %c.2, label %loop.latch, label %unreachable.exit
+ br i1 %c.2, label %loop.latch, label %deopt.exit
loop.latch:
%gep = getelementptr i32, i32* %ptr, i32 %iv
@@ -353,7 +451,7 @@ loop.latch:
exit:
ret i32 %sum.next
-unreachable.exit:
+deopt.exit:
%rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %sum) ]
ret i32 %rval
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
index c24affbeaad12..1db41ea96e8c3 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
@@ -9,7 +9,7 @@
define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) {
; CHECK-LABEL: @sum_2_at_with_int_conversion(
-; CHECK-NEXT: entry:
+; CHECK-NEXT: at_with_int_conversion.exit12.peel:
; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0
; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8
; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1
@@ -17,24 +17,75 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) {
; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64
; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64
; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]]
-; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 0
-; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 1
+; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 1
+; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 0
+; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8
+; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8
+; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64
+; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64
+; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]]
+; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4
+; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4
+; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]]
+; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[C_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I7_PEEL]], i64 [[SUB_I]])
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN16]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER22:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[SUM_NEXT_PEEL]], i32 0
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI18:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD19]], [[VEC_PHI18]]
+; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD20]]
+; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD21]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP16]], [[TMP15]]
+; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
+; CHECK-NEXT: br label [[LOOP_PREHEADER22]]
+; CHECK: loop.preheader22:
+; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER22]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ], [ [[SUM_PH]], [[LOOP_PREHEADER22]] ]
; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]]
; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]]
; CHECK: error.i:
; CHECK-NEXT: tail call void @error()
; CHECK-NEXT: unreachable
; CHECK: at_with_int_conversion.exit:
-; CHECK-NEXT: [[START_I2:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8
-; CHECK-NEXT: [[END_I4:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8
-; CHECK-NEXT: [[START_INT_I5:%.*]] = ptrtoint i64* [[START_I2]] to i64
-; CHECK-NEXT: [[END_INT_I6:%.*]] = ptrtoint i64* [[END_I4]] to i64
-; CHECK-NEXT: [[SUB_I7:%.*]] = sub i64 [[END_INT_I6]], [[START_INT_I5]]
-; CHECK-NEXT: [[INRANGE_I8:%.*]] = icmp ult i64 [[SUB_I7]], [[IV]]
+; CHECK-NEXT: [[INRANGE_I8:%.*]] = icmp ult i64 [[SUB_I7_PEEL]], [[IV]]
; CHECK-NEXT: br i1 [[INRANGE_I8]], label [[ERROR_I11:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT12]]
; CHECK: error.i11:
; CHECK-NEXT: tail call void @error()
@@ -42,15 +93,16 @@ define i64 @sum_2_at_with_int_conversion(%vec* %A, %vec* %B, i64 %N) {
; CHECK: at_with_int_conversion.exit12:
; CHECK-NEXT: [[GEP_IDX_I:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[IV]]
; CHECK-NEXT: [[LV_I:%.*]] = load i64, i64* [[GEP_IDX_I]], align 4
-; CHECK-NEXT: [[GEP_IDX_I9:%.*]] = getelementptr i64, i64* [[START_I2]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[IV]]
; CHECK-NEXT: [[LV_I10:%.*]] = load i64, i64* [[GEP_IDX_I9]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LV_I]], [[SUM]]
; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD]], [[LV_I10]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp slt i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: ret i64 [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT12_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT12]] ]
+; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
;
entry:
br label %loop
@@ -72,7 +124,7 @@ exit:
define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) {
; CHECK-LABEL: @sum_3_at_with_int_conversion(
-; CHECK-NEXT: entry:
+; CHECK-NEXT: at_with_int_conversion.exit24.peel:
; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0
; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8
; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1
@@ -80,14 +132,86 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) {
; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64
; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64
; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]]
-; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 0
-; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 1
; CHECK-NEXT: [[GEP_START_I13:%.*]] = getelementptr [[VEC]], %vec* [[C:%.*]], i64 0, i32 0
; CHECK-NEXT: [[GEP_END_I15:%.*]] = getelementptr [[VEC]], %vec* [[C]], i64 0, i32 1
+; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 1
+; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 0
+; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4
+; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8
+; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8
+; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64
+; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64
+; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]]
+; CHECK-NEXT: [[START_I14_PEEL:%.*]] = load i64*, i64** [[GEP_START_I13]], align 8
+; CHECK-NEXT: [[END_I16_PEEL:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8
+; CHECK-NEXT: [[START_INT_I17_PEEL:%.*]] = ptrtoint i64* [[START_I14_PEEL]] to i64
+; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL]] to i64
+; CHECK-NEXT: [[SUB_I19_PEEL:%.*]] = sub i64 [[END_INT_I18_PEEL]], [[START_INT_I17_PEEL]]
+; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4
+; CHECK-NEXT: [[LV_I22_PEEL:%.*]] = load i64, i64* [[START_I14_PEEL]], align 4
+; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]]
+; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[ADD_2_PEEL]], [[LV_I22_PEEL]]
+; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0
+; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.preheader:
+; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I19_PEEL]], i64 [[SUB_I7_PEEL]])
+; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]])
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN29]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER37:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[SUM_NEXT_PEEL]], i32 0
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, i64* [[TMP13]], i64 2
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to <2 x i64>*
+; CHECK-NEXT: [[WIDE_LOAD36:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD32]], [[VEC_PHI31]]
+; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD33]]
+; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD34]]
+; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD35]]
+; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD36]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP22]], [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
+; CHECK-NEXT: br label [[LOOP_PREHEADER37]]
+; CHECK: loop.preheader37:
+; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER37]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ], [ [[SUM_PH]], [[LOOP_PREHEADER37]] ]
; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]]
; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]]
; CHECK: error.i:
@@ -96,40 +220,31 @@ define i64 @sum_3_at_with_int_conversion(%vec* %A, %vec* %B, %vec* %C, i64 %N) {
; CHECK: at_with_int_conversion.exit:
; CHECK-NEXT: [[GEP_IDX_I:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[IV]]
; CHECK-NEXT: [[LV_I:%.*]] = load i64, i64* [[GEP_IDX_I]], align 4
-; CHECK-NEXT: [[START_I2:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8
-; CHECK-NEXT: [[END_I4:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8
-; CHECK-NEXT: [[START_INT_I5:%.*]] = ptrtoint i64* [[START_I2]] to i64
-; CHECK-NEXT: [[END_INT_I6:%.*]] = ptrtoint i64* [[END_I4]] to i64
-; CHECK-NEXT: [[SUB_I7:%.*]] = sub i64 [[END_INT_I6]], [[START_INT_I5]]
-; CHECK-NEXT: [[INRANGE_I8:%.*]] = icmp ult i64 [[SUB_I7]], [[IV]]
+; CHECK-NEXT: [[INRANGE_I8:%.*]] = icmp ult i64 [[SUB_I7_PEEL]], [[IV]]
; CHECK-NEXT: br i1 [[INRANGE_I8]], label [[ERROR_I11:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT12:%.*]]
; CHECK: error.i11:
; CHECK-NEXT: tail call void @error()
; CHECK-NEXT: unreachable
; CHECK: at_with_int_conversion.exit12:
-; CHECK-NEXT: [[START_I14:%.*]] = load i64*, i64** [[GEP_START_I13]], align 8
-; CHECK-NEXT: [[END_I16:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8
-; CHECK-NEXT: [[START_INT_I17:%.*]] = ptrtoint i64* [[START_I14]] to i64
-; CHECK-NEXT: [[END_INT_I18:%.*]] = ptrtoint i64* [[END_I16]] to i64
-; CHECK-NEXT: [[SUB_I19:%.*]] = sub i64 [[END_INT_I18]], [[START_INT_I17]]
-; CHECK-NEXT: [[INRANGE_I20:%.*]] = icmp ult i64 [[SUB_I19]], [[IV]]
+; CHECK-NEXT: [[INRANGE_I20:%.*]] = icmp ult i64 [[SUB_I19_PEEL]], [[IV]]
; CHECK-NEXT: br i1 [[INRANGE_I20]], label [[ERROR_I23:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT24]]
; CHECK: error.i23:
; CHECK-NEXT: tail call void @error()
; CHECK-NEXT: unreachable
; CHECK: at_with_int_conversion.exit24:
-; CHECK-NEXT: [[GEP_IDX_I9:%.*]] = getelementptr i64, i64* [[START_I2]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[IV]]
; CHECK-NEXT: [[LV_I10:%.*]] = load i64, i64* [[GEP_IDX_I9]], align 4
-; CHECK-NEXT: [[GEP_IDX_I21:%.*]] = getelementptr i64, i64* [[START_I14]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I21:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[IV]]
; CHECK-NEXT: [[LV_I22:%.*]] = load i64, i64* [[GEP_IDX_I21]], align 4
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[LV_I]], [[SUM]]
; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[LV_I10]]
; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD_2]], [[LV_I22]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N:%.*]]
-; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: ret i64 [[SUM_NEXT]]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT24_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT24]] ]
+; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
;
entry:
br label %loop
More information about the llvm-commits
mailing list