[llvm] [LV] Pure runtime check for minimum profitable trip count. (PR #115833)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 01:49:53 PST 2024
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/115833
>From 2c8fb001052487ec995724bbddcd78d446ba155f Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 11 Nov 2024 23:58:18 -0800
Subject: [PATCH 1/2] [LV] Emit pure profitable runtime check.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +++
.../AArch64/conditional-branches-cost.ll | 6 ++-
.../AArch64/divs-with-scalable-vfs.ll | 6 ++-
.../AArch64/induction-costs-sve.ll | 15 ++++--
.../AArch64/low_trip_count_predicates.ll | 3 +-
.../LoopVectorize/AArch64/pr73894.ll | 3 +-
.../AArch64/uniform-args-call-variants.ll | 52 +++++++++++--------
.../X86/divs-with-tail-folding.ll | 6 ++-
.../LoopVectorize/first-order-recurrence.ll | 6 ++-
9 files changed, 67 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1d9e4f5a19f5ce..5b160bca48f53f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2490,6 +2490,12 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// Don't execute the vector loop if (UMax - n) < (VF * UF).
CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
+ } else if (MinProfitableTripCount.isNonZero()) {
+ // Emit pure profitable runtime check. Don't execute the vectorized loop if
+ // trip count <= minimum profitable trip count.
+ Value *MinProfTC =
+ Builder.CreateElementCount(CountTy, MinProfitableTripCount);
+ CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULE, Count, MinProfTC);
}
// Create new preheader for vector loop.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index c6e58326158a37..96044f99e3938e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -596,7 +596,8 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; PRED-NEXT: entry:
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; PRED-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP0]], 58
+; PRED-NEXT: br i1 [[TMP5]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED: vector.memcheck:
; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
; PRED-NEXT: [[TMP1:%.*]] = shl i64 [[N]], 2
@@ -1299,7 +1300,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[SRC_3:%.*]], ptr noalias [[SRC_4:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] {
; PRED-NEXT: entry:
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; PRED-NEXT: [[TMP92:%.*]] = icmp ule i64 [[TMP0]], 27
+; PRED-NEXT: br i1 [[TMP92]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
; PRED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]])
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 0b5d65fc03821d..8686d5b4a92480 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -101,7 +101,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp ule i64 [[N]], 7
+; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
@@ -229,7 +230,8 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[MUL_1_I:%.*]] = mul i64 [[X]], [[X]]
; CHECK-NEXT: [[MUL_2_I:%.*]] = mul i64 [[MUL_1_I]], [[X]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[TMP18:%.*]] = icmp ule i64 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 2f756ab4b0e1ab..8ab0ab28401f2f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -140,7 +140,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
; PRED-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; PRED-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP0]], 5
+; PRED-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED: vector.memcheck:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
@@ -302,7 +303,8 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; PRED-NEXT: [[TMP26:%.*]] = icmp ule i64 [[TMP0]], 26
+; PRED-NEXT: br i1 [[TMP26]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[MUL_X]], 0
@@ -473,7 +475,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]]
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; PRED-NEXT: [[TMP37:%.*]] = icmp ule i64 [[TMP0]], 24
+; PRED-NEXT: br i1 [[TMP37]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]]
; PRED-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
@@ -671,7 +674,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[ADD:%.*]] = add i32 [[X]], 1
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; PRED-NEXT: [[TMP36:%.*]] = icmp ule i64 [[TMP0]], 22
+; PRED-NEXT: br i1 [[TMP36]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
; PRED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[ADD]], 0
@@ -848,7 +852,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; PRED-NEXT: entry:
; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; PRED-NEXT: [[TMP9:%.*]] = icmp ule i64 [[UMAX1]], 20
+; PRED-NEXT: br i1 [[TMP9]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index 1ec384b05779a8..7aad334983ae3b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -298,7 +298,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TC]], 1
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1028, [[TMP20]]
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 8
+; CHECK-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TC]], 1
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
index a70eafb6078a03..220c78f1bc6a7d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
@@ -10,7 +10,8 @@ define i32 @pr70988(ptr %src, i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 %n, 15
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 1)
; CHECK-NEXT: [[UMAX:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[UMAX]], 1
+; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 1
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
index d0decbff1a4625..d19b3b9d8a1d88 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
@@ -200,38 +200,48 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE-LABEL: define void @test_uniform_not_invariant
; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; INTERLEAVE-NEXT: entry:
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2)
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = icmp ne i64 [[N]], 0
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ugt i64 [[N]], 1
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp ult i64 [[N]], 2
+; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
+; INTERLEAVE: vector.ph:
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
-; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ true, [[PRED_STORE_CONTINUE4]] ]
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
-; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; INTERLEAVE-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; INTERLEAVE: pred.store.if:
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
-; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
+; INTERLEAVE-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = call double @foo(double [[TMP3]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
+; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
+; INTERLEAVE-NEXT: store double [[TMP4]], ptr [[TMP5]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]]
; INTERLEAVE: pred.store.continue:
; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; INTERLEAVE: pred.store.if3:
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]]
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]]
-; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]]
-; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP6]]
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8
+; INTERLEAVE-NEXT: [[TMP9:%.*]] = call double @foo(double [[TMP8]], i64 [[TMP6]]) #[[ATTR5]]
+; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP6]]
+; INTERLEAVE-NEXT: store double [[TMP9]], ptr [[TMP10]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]]
; INTERLEAVE: pred.store.continue4:
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; INTERLEAVE-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]]
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP10]], [[TMP0]]
+; INTERLEAVE-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]]
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP11]], [[TMP1]]
; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK_NEXT]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
+; INTERLEAVE: for.body:
+; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; INTERLEAVE-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
+; INTERLEAVE-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
+; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5]]
+; INTERLEAVE-NEXT: [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
+; INTERLEAVE-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
+; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; INTERLEAVE: for.cond.cleanup:
; INTERLEAVE-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index c861aa8172b9b8..de7c012c37f511 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -6,7 +6,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[N]], 4
+; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
@@ -95,7 +96,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ule i64 [[N]], 6
+; CHECK-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 6c94f5bfc836a7..09d16d4b99b1a7 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2811,7 +2811,8 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
+; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp ule i32 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
@@ -3184,7 +3185,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
+; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = icmp ule i32 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT: br i1 [[TMP19]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
>From 7a6df48b981e4831d16095ab441dd8a0202c8baf Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 18 Nov 2024 05:53:50 -0800
Subject: [PATCH 2/2] Use ult since the divideCeil is round up the MinProfTC
---
.../Transforms/Vectorize/LoopVectorize.cpp | 3 +-
.../AArch64/conditional-branches-cost.ll | 4 +-
.../AArch64/divs-with-scalable-vfs.ll | 8 ++--
.../AArch64/induction-costs-sve.ll | 10 ++---
.../AArch64/low_trip_count_predicates.ll | 4 +-
.../LoopVectorize/AArch64/pr73894.ll | 2 +-
.../AArch64/uniform-args-call-variants.ll | 37 ++++++++++---------
.../X86/divs-with-tail-folding.ll | 8 ++--
.../LoopVectorize/first-order-recurrence.ll | 8 ++--
9 files changed, 43 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5b160bca48f53f..71cce9873f5ffe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2495,7 +2495,8 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// trip count <= minimum profitable trip count.
Value *MinProfTC =
Builder.CreateElementCount(CountTy, MinProfitableTripCount);
- CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULE, Count, MinProfTC);
+ CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, Count, MinProfTC,
+ "min.prof.check");
}
// Create new preheader for vector loop.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 96044f99e3938e..74c3a263d63fd2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -596,7 +596,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; PRED-NEXT: entry:
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP0]], 58
+; PRED-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP0]], 58
; PRED-NEXT: br i1 [[TMP5]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED: vector.memcheck:
; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[E]], i64 4
@@ -1300,7 +1300,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-SAME: ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[SRC_3:%.*]], ptr noalias [[SRC_4:%.*]], ptr noalias [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] {
; PRED-NEXT: entry:
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP92:%.*]] = icmp ule i64 [[TMP0]], 27
+; PRED-NEXT: [[TMP92:%.*]] = icmp ult i64 [[TMP0]], 27
; PRED-NEXT: br i1 [[TMP92]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 8686d5b4a92480..b23641b403ab98 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -101,8 +101,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = icmp ule i64 [[N]], 7
-; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i64 [[N]], 7
+; CHECK-NEXT: br i1 [[MIN_PROF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
@@ -230,8 +230,8 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[MUL_1_I:%.*]] = mul i64 [[X]], [[X]]
; CHECK-NEXT: [[MUL_2_I:%.*]] = mul i64 [[MUL_1_I]], [[X]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = icmp ule i64 [[TMP0]], 2
-; CHECK-NEXT: br i1 [[TMP18]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[MIN_PROF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 8ab0ab28401f2f..f73a8096b3dd20 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -140,7 +140,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
; PRED-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP0]], 5
+; PRED-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP0]], 5
; PRED-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; PRED: vector.memcheck:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
@@ -303,7 +303,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP26:%.*]] = icmp ule i64 [[TMP0]], 26
+; PRED-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP0]], 26
; PRED-NEXT: br i1 [[TMP26]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
@@ -475,7 +475,7 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[MUL:%.*]] = mul i32 [[X]], [[X]]
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP37:%.*]] = icmp ule i64 [[TMP0]], 24
+; PRED-NEXT: [[TMP37:%.*]] = icmp ult i64 [[TMP0]], 24
; PRED-NEXT: br i1 [[TMP37]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = mul i32 [[X]], [[X]]
@@ -674,7 +674,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
; PRED-NEXT: entry:
; PRED-NEXT: [[ADD:%.*]] = add i32 [[X]], 1
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
-; PRED-NEXT: [[TMP36:%.*]] = icmp ule i64 [[TMP0]], 22
+; PRED-NEXT: [[TMP36:%.*]] = icmp ult i64 [[TMP0]], 22
; PRED-NEXT: br i1 [[TMP36]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]]
@@ -852,7 +852,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; PRED-NEXT: entry:
; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
-; PRED-NEXT: [[TMP9:%.*]] = icmp ule i64 [[UMAX1]], 20
+; PRED-NEXT: [[TMP9:%.*]] = icmp ult i64 [[UMAX1]], 20
; PRED-NEXT: br i1 [[TMP9]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; PRED: vector.scevcheck:
; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
index 7aad334983ae3b..c541c47ba4c070 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll
@@ -298,8 +298,8 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TC]], 1
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1028, [[TMP20]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 8
-; CHECK-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8
+; CHECK-NEXT: br i1 [[MIN_PROF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TC]], 1
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
index 220c78f1bc6a7d..f5eae5992d9bc4 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll
@@ -10,7 +10,7 @@ define i32 @pr70988(ptr %src, i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = and i32 %n, 15
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 1)
; CHECK-NEXT: [[UMAX:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[UMAX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 [[UMAX]], 1
; CHECK-NEXT: br i1 [[TMP7]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
index d19b3b9d8a1d88..f0f92adb4b681e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll
@@ -200,37 +200,38 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE-LABEL: define void @test_uniform_not_invariant
; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; INTERLEAVE-NEXT: entry:
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp ult i64 [[N]], 2
-; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
+; INTERLEAVE-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp eq i64 [[N]], 0
+; INTERLEAVE-NEXT: br i1 [[MIN_PROF_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2)
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ne i64 [[N]], 1
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
; INTERLEAVE-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; INTERLEAVE: pred.store.if:
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = call double @foo(double [[TMP3]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
-; INTERLEAVE-NEXT: store double [[TMP4]], ptr [[TMP5]], align 8
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
+; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
+; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]]
; INTERLEAVE: pred.store.continue:
; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; INTERLEAVE: pred.store.if3:
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP6]]
-; INTERLEAVE-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8
-; INTERLEAVE-NEXT: [[TMP9:%.*]] = call double @foo(double [[TMP8]], i64 [[TMP6]]) #[[ATTR5]]
-; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP6]]
-; INTERLEAVE-NEXT: store double [[TMP9]], ptr [[TMP10]], align 8
+; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]]
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]]
+; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]]
+; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]]
; INTERLEAVE: pred.store.continue4:
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; INTERLEAVE-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP1]]
-; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP11]], [[TMP1]]
+; INTERLEAVE-NEXT: [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = icmp ult i64 [[INDEX]], [[TMP0]]
+; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = icmp ult i64 [[TMP10]], [[TMP0]]
; INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK_NEXT]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
; INTERLEAVE: for.body:
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index de7c012c37f511..267c6d9460a22d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -6,8 +6,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[N]], 4
-; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT: br i1 [[MIN_PROF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
@@ -96,8 +96,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = icmp ule i64 [[N]], 6
-; CHECK-NEXT: br i1 [[TMP14]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i64 [[N]], 6
+; CHECK-NEXT: br i1 [[MIN_PROF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 09d16d4b99b1a7..57b14194dde9b8 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -2811,8 +2811,8 @@ define i32 @sink_into_replication_region(i32 %y) {
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp ule i32 [[TMP1]], 1
-; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
+; UNROLL-NO-VF-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i32 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT: br i1 [[MIN_PROF_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26:![0-9]+]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
@@ -3185,8 +3185,8 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
; UNROLL-NO-VF-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
-; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = icmp ule i32 [[TMP1]], 1
-; UNROLL-NO-VF-NEXT: br i1 [[TMP19]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
+; UNROLL-NO-VF-NEXT: [[MIN_PROF_CHECK:%.*]] = icmp ult i32 [[TMP1]], 1
+; UNROLL-NO-VF-NEXT: br i1 [[MIN_PROF_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF26]]
; UNROLL-NO-VF: vector.ph:
; UNROLL-NO-VF-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
More information about the llvm-commits
mailing list