[llvm] [LICM][MustExec] Make must-exec logic for IV condition commutative (PR #93150)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 08:25:20 PDT 2024
https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/93150
>From 22e71608d3f9a13532ed711e71444bb028c28273 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 14 May 2024 13:11:27 +0900
Subject: [PATCH] [LICM][MustExec] Make must-exec logic for IV condition
commutative
MustExec has special logic to determine whether the first loop
iteration will always be executed, by simplifying the IV comparison
with the start value. Currently, this code assumes that the IV is
on the LHS of the comparison, but this is not guaranteed. Make sure
it handles the commuted variant as well.
The changed PhaseOrdering test previously performed peeling to
make the loads dereferenceable -- as a side effect, this also
reduced the exit count by one, avoiding the awkward <= MAX case.
Now we know up-front the the loads are dereferenceable and can
be simply hoisted. As such, we retain the original exit count
and now have to handle it by widening the exit count calculation to
i128. This is a regression, but at least it preserves the
vectorization, which was the original goal. I'm not sure what else
can be done about that test.
---
llvm/lib/Analysis/MustExecute.cpp | 19 +-
llvm/test/Transforms/LICM/hoist-mustexec.ll | 3 +-
...ple-unreachable-exits-for-vectorization.ll | 287 ++++++++----------
3 files changed, 148 insertions(+), 161 deletions(-)
diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index 904d30d0544654..caed62679a683c 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -135,16 +135,21 @@ static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
// todo: this would be a lot more powerful if we used scev, but all the
// plumbing is currently missing to pass a pointer in from the pass
// Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
+ ICmpInst::Predicate Pred = Cond->getPredicate();
auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0));
auto *RHS = Cond->getOperand(1);
- if (!LHS || LHS->getParent() != CurLoop->getHeader())
- return false;
- auto DL = ExitBlock->getDataLayout();
+ if (!LHS || LHS->getParent() != CurLoop->getHeader()) {
+ Pred = Cond->getSwappedPredicate();
+ LHS = dyn_cast<PHINode>(Cond->getOperand(1));
+ RHS = Cond->getOperand(0);
+ if (!LHS || LHS->getParent() != CurLoop->getHeader())
+ return false;
+ }
+
+ auto DL = ExitBlock->getModule()->getDataLayout();
auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
- auto *SimpleValOrNull = simplifyCmpInst(Cond->getPredicate(),
- IVStart, RHS,
- {DL, /*TLI*/ nullptr,
- DT, /*AC*/ nullptr, BI});
+ auto *SimpleValOrNull = simplifyCmpInst(
+ Pred, IVStart, RHS, {DL, /*TLI*/ nullptr, DT, /*AC*/ nullptr, BI});
auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
if (!SimpleCst)
return false;
diff --git a/llvm/test/Transforms/LICM/hoist-mustexec.ll b/llvm/test/Transforms/LICM/hoist-mustexec.ll
index 81e0815053ffe5..a6f5a2be05ee41 100644
--- a/llvm/test/Transforms/LICM/hoist-mustexec.ll
+++ b/llvm/test/Transforms/LICM/hoist-mustexec.ll
@@ -218,7 +218,6 @@ fail:
}
; Same as previous case, with commuted icmp.
-; FIXME: The load should get hoisted here as well.
define i32 @test3_commuted(ptr noalias nocapture readonly %a) nounwind uwtable {
; CHECK-LABEL: define i32 @test3_commuted(
; CHECK-SAME: ptr noalias nocapture readonly [[A:%.*]]) #[[ATTR1]] {
@@ -227,6 +226,7 @@ define i32 @test3_commuted(ptr noalias nocapture readonly %a) nounwind uwtable {
; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[IS_ZERO]], label [[FAIL:%.*]], label [[PREHEADER:%.*]]
; CHECK: preheader:
+; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[INC:%.*]], [[CONTINUE:%.*]] ]
@@ -234,7 +234,6 @@ define i32 @test3_commuted(ptr noalias nocapture readonly %a) nounwind uwtable {
; CHECK-NEXT: [[R_CHK:%.*]] = icmp uge i32 [[LEN]], [[IV]]
; CHECK-NEXT: br i1 [[R_CHK]], label [[CONTINUE]], label [[FAIL_LOOPEXIT:%.*]]
; CHECK: continue:
-; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[A]], align 4
; CHECK-NEXT: [[ADD]] = add nsw i32 [[I1]], [[ACC]]
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
index 8fc5189e8bc79e..cc4890e27f2bda 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
@@ -9,95 +9,87 @@
define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) {
; CHECK-LABEL: @sum_2_at_with_int_conversion(
-; CHECK-NEXT: at_with_int_conversion.exit11.peel:
+; CHECK-NEXT: entry:
; CHECK-NEXT: [[START_I:%.*]] = load ptr, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[END_I:%.*]] = load ptr, ptr [[GEP_END_I]], align 8
; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint ptr [[START_I]] to i64
; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint ptr [[END_I]] to i64
; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]]
+; CHECK-NEXT: [[START_I1:%.*]] = load ptr, ptr [[B:%.*]], align 8
+; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[END_I3:%.*]] = load ptr, ptr [[GEP_END_I2]], align 8
+; CHECK-NEXT: [[START_INT_I4:%.*]] = ptrtoint ptr [[START_I1]] to i64
+; CHECK-NEXT: [[END_INT_I5:%.*]] = ptrtoint ptr [[END_I3]] to i64
+; CHECK-NEXT: [[SUB_I6:%.*]] = sub i64 [[END_INT_I5]], [[START_INT_I4]]
+; CHECK-NEXT: [[TMP0:%.*]] = zext i64 [[SUB_I]] to i128
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i128 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[SUB_I6]] to i128
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i128 [[TMP2]], 1
+; CHECK-NEXT: [[UMIN:%.*]] = tail call i128 @llvm.umin.i128(i128 [[TMP3]], i128 [[TMP1]])
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0)
-; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 8
-; CHECK-NEXT: [[START_I1_PEEL:%.*]] = load ptr, ptr [[B]], align 8
-; CHECK-NEXT: [[END_I3_PEEL:%.*]] = load ptr, ptr [[GEP_END_I2]], align 8
-; CHECK-NEXT: [[START_INT_I4_PEEL:%.*]] = ptrtoint ptr [[START_I1_PEEL]] to i64
-; CHECK-NEXT: [[END_INT_I5_PEEL:%.*]] = ptrtoint ptr [[END_I3_PEEL]] to i64
-; CHECK-NEXT: [[SUB_I6_PEEL:%.*]] = sub i64 [[END_INT_I5_PEEL]], [[START_INT_I4_PEEL]]
-; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, ptr [[START_I]], align 8
-; CHECK-NEXT: [[LV_I9_PEEL:%.*]] = load i64, ptr [[START_I1_PEEL]], align 8
-; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I9_PEEL]]
-; CHECK-NEXT: [[EXITCOND_PEEL_NOT:%.*]] = icmp slt i64 [[N]], 1
-; CHECK-NEXT: br i1 [[EXITCOND_PEEL_NOT]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i64 [[SMAX]] to i128
+; CHECK-NEXT: [[UMIN12:%.*]] = tail call i128 @llvm.umin.i128(i128 [[UMIN]], i128 [[TMP4]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i128 [[TMP1]], [[UMIN12]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[ERROR_I:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i128 [[TMP3]], [[UMIN12]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[ERROR_I10:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
-; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I6_PEEL]], i64 [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]]
-; CHECK-NEXT: [[UMIN15:%.*]] = tail call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SUB_I]])
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[UMIN15]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 5
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER20:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = add nuw i64 [[SMAX]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER17:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[SUM_NEXT_PEEL]], i64 0
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP7]], -4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[WIDE_LOAD17:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI13:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16
-; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[WIDE_LOAD17]], [[VEC_PHI16]]
-; CHECK-NEXT: [[TMP12]] = add <2 x i64> [[TMP10]], [[WIDE_LOAD18]]
-; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[WIDE_LOAD19]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[START_I1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <2 x i64>, ptr [[TMP10]], align 8
+; CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <2 x i64>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD14]], [[VEC_PHI13]]
+; CHECK-NEXT: [[TMP14]] = add <2 x i64> [[TMP12]], [[WIDE_LOAD15]]
+; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD16]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP13]], [[TMP12]]
-; CHECK-NEXT: [[TMP15:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
-; CHECK-NEXT: br label [[LOOP_PREHEADER20]]
-; CHECK: loop.preheader20:
-; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP15]], [[TMP14]]
+; CHECK-NEXT: [[TMP17:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP7]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER17]]
+; CHECK: loop.preheader17:
+; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT11:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER20]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT11]] ], [ [[SUM_PH]], [[LOOP_PREHEADER20]] ]
-; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]]
-; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]]
-; CHECK: error.i:
-; CHECK-NEXT: tail call void @error()
-; CHECK-NEXT: unreachable
-; CHECK: at_with_int_conversion.exit:
-; CHECK-NEXT: [[INRANGE_I7:%.*]] = icmp ult i64 [[SUB_I6_PEEL]], [[IV]]
-; CHECK-NEXT: br i1 [[INRANGE_I7]], label [[ERROR_I10:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT11]]
-; CHECK: error.i10:
-; CHECK-NEXT: tail call void @error()
-; CHECK-NEXT: unreachable
-; CHECK: at_with_int_conversion.exit11:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[IV_PH]], [[LOOP_PREHEADER17]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ [[SUM_PH]], [[LOOP_PREHEADER17]] ]
; CHECK-NEXT: [[GEP_IDX_I:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[IV]]
; CHECK-NEXT: [[LV_I:%.*]] = load i64, ptr [[GEP_IDX_I]], align 8
-; CHECK-NEXT: [[GEP_IDX_I8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I8:%.*]] = getelementptr i64, ptr [[START_I1]], i64 [[IV]]
; CHECK-NEXT: [[LV_I9:%.*]] = load i64, ptr [[GEP_IDX_I8]], align 8
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LV_I]], [[SUM]]
; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD]], [[LV_I9]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[SMAX]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: error.i:
+; CHECK-NEXT: tail call void @error()
+; CHECK-NEXT: unreachable
+; CHECK: error.i10:
+; CHECK-NEXT: tail call void @error()
+; CHECK-NEXT: unreachable
; CHECK: exit:
-; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT11_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT11]] ]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP17]], [[MIDDLE_BLOCK]] ], [ [[SUM_NEXT]], [[LOOP]] ]
; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
;
entry:
@@ -120,120 +112,111 @@ exit:
define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) {
; CHECK-LABEL: @sum_3_at_with_int_conversion(
-; CHECK-NEXT: at_with_int_conversion.exit22.peel:
+; CHECK-NEXT: entry:
; CHECK-NEXT: [[START_I:%.*]] = load ptr, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr i8, ptr [[A]], i64 8
; CHECK-NEXT: [[END_I:%.*]] = load ptr, ptr [[GEP_END_I]], align 8
; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint ptr [[START_I]] to i64
; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint ptr [[END_I]] to i64
; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]]
-; CHECK-NEXT: [[GEP_END_I13:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 8
+; CHECK-NEXT: [[START_I1:%.*]] = load ptr, ptr [[B:%.*]], align 8
+; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr i8, ptr [[B]], i64 8
+; CHECK-NEXT: [[END_I3:%.*]] = load ptr, ptr [[GEP_END_I2]], align 8
+; CHECK-NEXT: [[START_INT_I4:%.*]] = ptrtoint ptr [[START_I1]] to i64
+; CHECK-NEXT: [[END_INT_I5:%.*]] = ptrtoint ptr [[END_I3]] to i64
+; CHECK-NEXT: [[SUB_I6:%.*]] = sub i64 [[END_INT_I5]], [[START_INT_I4]]
+; CHECK-NEXT: [[START_I12:%.*]] = load ptr, ptr [[C:%.*]], align 8
+; CHECK-NEXT: [[GEP_END_I13:%.*]] = getelementptr i8, ptr [[C]], i64 8
+; CHECK-NEXT: [[END_I14:%.*]] = load ptr, ptr [[GEP_END_I13]], align 8
+; CHECK-NEXT: [[START_INT_I15:%.*]] = ptrtoint ptr [[START_I12]] to i64
+; CHECK-NEXT: [[END_INT_I16:%.*]] = ptrtoint ptr [[END_I14]] to i64
+; CHECK-NEXT: [[SUB_I17:%.*]] = sub i64 [[END_INT_I16]], [[START_INT_I15]]
+; CHECK-NEXT: [[TMP0:%.*]] = zext i64 [[SUB_I]] to i128
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i128 [[TMP0]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[SUB_I6]] to i128
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i128 [[TMP2]], 1
+; CHECK-NEXT: [[UMIN:%.*]] = tail call i128 @llvm.umin.i128(i128 [[TMP3]], i128 [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = zext i64 [[SUB_I17]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i128 [[TMP4]], 1
+; CHECK-NEXT: [[UMIN23:%.*]] = tail call i128 @llvm.umin.i128(i128 [[UMIN]], i128 [[TMP5]])
; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 0)
-; CHECK-NEXT: [[GEP_END_I2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 8
-; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, ptr [[START_I]], align 8
-; CHECK-NEXT: [[START_I1_PEEL:%.*]] = load ptr, ptr [[B]], align 8
-; CHECK-NEXT: [[END_I3_PEEL:%.*]] = load ptr, ptr [[GEP_END_I2]], align 8
-; CHECK-NEXT: [[START_INT_I4_PEEL:%.*]] = ptrtoint ptr [[START_I1_PEEL]] to i64
-; CHECK-NEXT: [[END_I3_PEEL_FR:%.*]] = freeze ptr [[END_I3_PEEL]]
-; CHECK-NEXT: [[END_INT_I5_PEEL:%.*]] = ptrtoint ptr [[END_I3_PEEL_FR]] to i64
-; CHECK-NEXT: [[SUB_I6_PEEL:%.*]] = sub i64 [[END_INT_I5_PEEL]], [[START_INT_I4_PEEL]]
-; CHECK-NEXT: [[START_I12_PEEL:%.*]] = load ptr, ptr [[C]], align 8
-; CHECK-NEXT: [[END_I14_PEEL:%.*]] = load ptr, ptr [[GEP_END_I13]], align 8
-; CHECK-NEXT: [[START_INT_I15_PEEL:%.*]] = ptrtoint ptr [[START_I12_PEEL]] to i64
-; CHECK-NEXT: [[END_INT_I16_PEEL:%.*]] = ptrtoint ptr [[END_I14_PEEL]] to i64
-; CHECK-NEXT: [[SUB_I17_PEEL:%.*]] = sub i64 [[END_INT_I16_PEEL]], [[START_INT_I15_PEEL]]
-; CHECK-NEXT: [[LV_I9_PEEL:%.*]] = load i64, ptr [[START_I1_PEEL]], align 8
-; CHECK-NEXT: [[LV_I20_PEEL:%.*]] = load i64, ptr [[START_I12_PEEL]], align 8
-; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I9_PEEL]]
-; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[ADD_2_PEEL]], [[LV_I20_PEEL]]
-; CHECK-NEXT: [[EXITCOND_PEEL_NOT:%.*]] = icmp slt i64 [[N]], 1
-; CHECK-NEXT: br i1 [[EXITCOND_PEEL_NOT]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = zext nneg i64 [[SMAX]] to i128
+; CHECK-NEXT: [[UMIN24:%.*]] = tail call i128 @llvm.umin.i128(i128 [[UMIN23]], i128 [[TMP6]])
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i128 [[TMP1]], [[UMIN24]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i128 [[TMP5]], [[UMIN24]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[ERROR_I:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i128 [[TMP3]], [[UMIN24]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[ERROR_I10:%.*]], label [[ENTRY_SPLIT_SPLIT:%.*]]
+; CHECK: entry.split.split:
+; CHECK-NEXT: br i1 [[TMP8]], label [[ERROR_I21:%.*]], label [[LOOP_PREHEADER:%.*]]
; CHECK: loop.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
-; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I17_PEEL]], i64 [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]]
-; CHECK-NEXT: [[UMIN26:%.*]] = tail call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SUB_I6_PEEL]])
-; CHECK-NEXT: [[UMIN27:%.*]] = tail call i64 @llvm.umin.i64(i64 [[UMIN26]], i64 [[SUB_I]])
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[UMIN27]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 5
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER34:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = add nuw i64 [[SMAX]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER31:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[SUM_NEXT_PEEL]], i64 0
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP10]], -4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI28:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16
-; CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[START_I12_PEEL]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i64 16
-; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, ptr [[TMP10]], align 8
-; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, ptr [[TMP11]], align 8
-; CHECK-NEXT: [[TMP12:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD29]], [[VEC_PHI28]]
-; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[TMP12]], [[WIDE_LOAD30]]
-; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD31]]
-; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD32]]
-; CHECK-NEXT: [[TMP17]] = add <2 x i64> [[TMP15]], [[WIDE_LOAD33]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI25:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <2 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[START_I1]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD27:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[WIDE_LOAD28:%.*]] = load <2 x i64>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[START_I12]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i64 16
+; CHECK-NEXT: [[WIDE_LOAD29:%.*]] = load <2 x i64>, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[WIDE_LOAD30:%.*]] = load <2 x i64>, ptr [[TMP16]], align 8
+; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD26]], [[VEC_PHI25]]
+; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD27]]
+; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD28]]
+; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD29]]
+; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD30]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP17]], [[TMP16]]
-; CHECK-NEXT: [[TMP19:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
-; CHECK-NEXT: br label [[LOOP_PREHEADER34]]
-; CHECK: loop.preheader34:
-; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP22]], [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP10]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER31]]
+; CHECK: loop.preheader31:
+; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT22:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER34]] ]
-; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT22]] ], [ [[SUM_PH]], [[LOOP_PREHEADER34]] ]
-; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]]
-; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]]
-; CHECK: error.i:
-; CHECK-NEXT: tail call void @error()
-; CHECK-NEXT: unreachable
-; CHECK: at_with_int_conversion.exit:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[IV_PH]], [[LOOP_PREHEADER31]] ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ [[SUM_PH]], [[LOOP_PREHEADER31]] ]
; CHECK-NEXT: [[GEP_IDX_I:%.*]] = getelementptr i64, ptr [[START_I]], i64 [[IV]]
; CHECK-NEXT: [[LV_I:%.*]] = load i64, ptr [[GEP_IDX_I]], align 8
-; CHECK-NEXT: [[INRANGE_I7:%.*]] = icmp ult i64 [[SUB_I6_PEEL]], [[IV]]
-; CHECK-NEXT: br i1 [[INRANGE_I7]], label [[ERROR_I10:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT11:%.*]]
-; CHECK: error.i10:
-; CHECK-NEXT: tail call void @error()
-; CHECK-NEXT: unreachable
-; CHECK: at_with_int_conversion.exit11:
-; CHECK-NEXT: [[INRANGE_I18:%.*]] = icmp ult i64 [[SUB_I17_PEEL]], [[IV]]
-; CHECK-NEXT: br i1 [[INRANGE_I18]], label [[ERROR_I21:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT22]]
-; CHECK: error.i21:
-; CHECK-NEXT: tail call void @error()
-; CHECK-NEXT: unreachable
-; CHECK: at_with_int_conversion.exit22:
-; CHECK-NEXT: [[GEP_IDX_I8:%.*]] = getelementptr i64, ptr [[START_I1_PEEL]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I8:%.*]] = getelementptr i64, ptr [[START_I1]], i64 [[IV]]
; CHECK-NEXT: [[LV_I9:%.*]] = load i64, ptr [[GEP_IDX_I8]], align 8
-; CHECK-NEXT: [[GEP_IDX_I19:%.*]] = getelementptr i64, ptr [[START_I12_PEEL]], i64 [[IV]]
+; CHECK-NEXT: [[GEP_IDX_I19:%.*]] = getelementptr i64, ptr [[START_I12]], i64 [[IV]]
; CHECK-NEXT: [[LV_I20:%.*]] = load i64, ptr [[GEP_IDX_I19]], align 8
; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[LV_I]], [[SUM]]
; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[LV_I9]]
; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[ADD_2]], [[LV_I20]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[SMAX]]
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: error.i:
+; CHECK-NEXT: tail call void @error()
+; CHECK-NEXT: unreachable
+; CHECK: error.i10:
+; CHECK-NEXT: tail call void @error()
+; CHECK-NEXT: unreachable
+; CHECK: error.i21:
+; CHECK-NEXT: tail call void @error()
+; CHECK-NEXT: unreachable
; CHECK: exit:
-; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[AT_WITH_INT_CONVERSION_EXIT22_PEEL:%.*]] ], [ [[SUM_NEXT]], [[AT_WITH_INT_CONVERSION_EXIT22]] ]
+; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[TMP24]], [[MIDDLE_BLOCK]] ], [ [[SUM_NEXT]], [[LOOP]] ]
; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
;
entry:
More information about the llvm-commits
mailing list