[llvm] [VPlan] Introduce chainUsesScalarValues (PR #158377)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 22 02:52:52 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/158377
>From 5de7b415bcd66bcdf8303e9d8238dcfe8c6fe392 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 12 Sep 2025 21:34:11 +0100
Subject: [PATCH] [VPlan] Introduce chainUsesScalarValues
Introduce chainUsesScalarValues to dig through a recipe-chain, skipping
widening decisions, and determine if the final leaves use only scalar
values of the given root. Demonstrate its utility in
narrowToSingleScalarRecipes, showing that it is essentially a drop-in
replacement for onlyScalarValuesUsed for the purposes of optimizations.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 12 ++--
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 24 +++++++
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 5 ++
.../AArch64/conditional-branches-cost.ll | 64 ++++++++-----------
.../extractvalue-no-scalarization-required.ll | 10 ++-
.../AArch64/sve-widen-extractvalue.ll | 8 +--
.../LoopVectorize/RISCV/dead-ops-cost.ll | 6 +-
.../LoopVectorize/X86/cost-model.ll | 56 ++++++++--------
...licate-recipe-with-only-first-lane-used.ll | 8 +--
.../LoopVectorize/X86/uniform_load.ll | 39 ++++-------
...first-order-recurrence-with-uniform-ops.ll | 18 ++----
...eref-pred-poison-ub-ops-feeding-pointer.ll | 18 +++---
llvm/test/Transforms/LoopVectorize/pr50686.ll | 18 ++----
.../LoopVectorize/single-scalar-cast-minbw.ll | 8 +--
14 files changed, 135 insertions(+), 159 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 84f02059743c3..5d09cbdafcb7f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1321,15 +1321,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
continue;
}
- // Skip recipes that aren't single scalars or don't have only their
- // scalar results used. In the latter case, we would introduce extra
- // broadcasts.
+ // Only consider recipes that are single scalars whose scalar value is
+ // used by the final leaves in a recipe-chain walk: if the final leaves
+ // don't use the scalar value, it could introduce extra broadcasts.
if (!vputils::isSingleScalar(RepOrWidenR) ||
- !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
- return U->usesScalars(RepOrWidenR) ||
- match(cast<VPRecipeBase>(U),
- m_ExtractLastElement(m_VPValue()));
- }))
+ !vputils::chainUsesScalarValues(RepOrWidenR))
continue;
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 917aa01f8a926..b5afe45080ebe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -29,6 +29,30 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
[Def](const VPUser *U) { return U->usesScalars(Def); });
}
+bool vputils::chainUsesScalarValues(const VPValue *Root) {
+ SmallVector<std::pair<const VPValue *, const VPUser *>> Worklist;
+ for (const VPUser *V : Root->users())
+ Worklist.emplace_back(Root, V);
+ while (!Worklist.empty()) {
+ auto [Op, U] = Worklist.pop_back_val();
+ if (isa<VPWidenRecipe, VPWidenCastRecipe, VPWidenCallRecipe,
+ VPWidenGEPRecipe, VPWidenSelectRecipe, VPWidenIntrinsicRecipe>(U)) {
+ const VPValue *Def = cast<VPSingleDefRecipe>(U);
+ for (const VPUser *V : Def->users())
+ Worklist.emplace_back(Def, V);
+ continue;
+ }
+ if (isa<VPWidenMemoryRecipe>(U) && vputils::isSingleScalar(Op))
+ continue;
+ if (auto *VPI = dyn_cast<VPInstruction>(U))
+ if (VPI->isVectorToScalar() || VPI->isSingleScalar())
+ continue;
+ if (!U->usesScalars(Op))
+ return false;
+ }
+ return true;
+}
+
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
if (auto *Expanded = Plan.getSCEVExpansion(Expr))
return Expanded;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 33dd8efaec2db..7801d9cc397e6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -28,6 +28,11 @@ bool onlyFirstPartUsed(const VPValue *Def);
/// Returns true if only scalar values of \p Def are used by all users.
bool onlyScalarValuesUsed(const VPValue *Def);
+/// Digs through a chain of recipes starting from \p Root, skipping widening
+/// decisions, and determines if the final leaves use only scalar values of \p
+/// Root.
+bool chainUsesScalarValues(const VPValue *Root);
+
/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index e4ee6776ae24c..a7a2233053807 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -428,48 +428,36 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
-; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
-; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
-; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
-; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
-; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
+; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
+; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
+; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
+; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
-; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
-; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
-; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
-; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
+; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
-; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
+; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
+; DEFAULT: [[PRED_STORE_IF28]]:
+; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]]
+; DEFAULT: [[PRED_STORE_CONTINUE29]]:
+; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
+; DEFAULT: [[PRED_STORE_IF30]]:
+; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]]
+; DEFAULT: [[PRED_STORE_CONTINUE31]]:
+; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33]]
; DEFAULT: [[PRED_STORE_IF32]]:
-; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
-; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
+; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
-; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
-; DEFAULT: [[PRED_STORE_IF34]]:
-; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
-; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
-; DEFAULT: [[PRED_STORE_CONTINUE35]]:
-; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
-; DEFAULT: [[PRED_STORE_IF36]]:
-; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
-; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
-; DEFAULT: [[PRED_STORE_CONTINUE37]]:
-; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
+; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[BROADCAST_SPLAT]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
@@ -728,10 +716,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP16:%.*]] = load float, ptr [[SRC_2]], align 4
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP16]], i64 0
+; DEFAULT-NEXT: [[TMP17:%.*]] = fmul float [[TMP16]], 0.000000e+00
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP17]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP17:%.*]] = fmul <8 x float> [[BROADCAST_SPLAT]], zeroinitializer
-; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[TMP17]])
+; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[BROADCAST_SPLAT]])
; DEFAULT-NEXT: [[TMP19:%.*]] = load float, ptr [[SRC_3]], align 4
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
index 5970608794b55..a0bafbdad1496 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll
@@ -16,7 +16,7 @@
; CM: vector.ph:
; CM: CLONE ir<%a> = extractvalue ir<%sv>
; CM: CLONE ir<%b> = extractvalue ir<%sv>
-; CM: WIDEN ir<%add> = add ir<%a>, ir<%b>
+; CM: CLONE ir<%add> = add ir<%a>, ir<%b>
; CM: Successor(s): vector loop
; CM: LV: Scalar loop costs: 5.
@@ -30,17 +30,15 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
; FORCED-NEXT: br label %[[VECTOR_PH:.*]]
; FORCED: [[VECTOR_PH]]:
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
-; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
+; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]]
+; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
-; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
; FORCED: [[VECTOR_BODY]]:
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
-; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
+; FORCED-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 4
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
index 0c6a490ddf4ba..eceda0897b174 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll
@@ -17,17 +17,15 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
-; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
-; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
-; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
+; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP8]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 65373e4128b7f..96d1953a54217 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -304,9 +304,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 -1)
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 9)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
@@ -320,7 +320,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
+; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 0078d00de28f8..5d19893251d66 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -224,7 +224,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[IND_END9:%.*]] = mul i64 [[N_VEC]], 32
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -269,7 +269,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[TMP155]] = fadd fast <4 x float> [[TMP154]], [[TMP153]]
; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4
; CHECK-NEXT: [[TMP156:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC8]]
-; CHECK-NEXT: br i1 [[TMP156]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP156]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP157:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP155]])
; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC8]]
@@ -289,7 +289,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
; CHECK-NEXT: [[ADD4]] = fadd fast float [[ADD]], [[T2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[T0]]
-; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loopexit:
; CHECK-NEXT: [[ADD4_LCSSA:%.*]] = phi float [ [[ADD4]], [[FOR]] ], [ [[TMP124]], [[MIDDLE_BLOCK]] ], [ [[TMP157]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_END]]
@@ -335,7 +335,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[UMIN7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN7]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 28
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 14
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1)
@@ -369,21 +369,19 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META5:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META8:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META9:![0-9]+]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP15]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[TMP21]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
-; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
+; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -406,7 +404,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
-; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -485,7 +483,7 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i1> [[TMP27]], [[TMP26]]
; CHECK-NEXT: [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[BIN_RDX]])
@@ -505,7 +503,7 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
; CHECK-NEXT: [[ANY_OF_NEXT]] = select i1 [[CMP13_NOT_NOT]], i1 [[ANY_OF]], i1 false
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 40
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
-; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[ANY_OF_NEXT_LCSSA:%.*]] = phi i1 [ [[ANY_OF_NEXT]], [[LOOP]] ]
; CHECK-NEXT: ret i1 [[ANY_OF_NEXT_LCSSA]]
@@ -562,7 +560,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP9]], [[BIN_RDX]]
@@ -583,7 +581,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
; CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw [9 x i8], ptr null, i64 [[IV_NEXT]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[GEP]], [[END]]
-; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP12]], [[LOOP]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[DOTLCSSA]]
@@ -627,7 +625,7 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP2]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]])
@@ -649,7 +647,7 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[DST]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 29
-; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -694,7 +692,7 @@ define i64 @live_in_known_1_via_scev() {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
-; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -751,7 +749,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
; CHECK: vector.body:
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]])
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -805,7 +803,7 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32
@@ -891,7 +889,7 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP16]], [[TMP15]]
; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <4 x i32> [[TMP17]], [[BIN_RDX]]
@@ -902,7 +900,7 @@ define i32 @g(i64 %n) {
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i32 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING]], 4
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -926,7 +924,7 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i32 [[INDEX9]], 4
; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND10]], splat (i32 4)
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT15]], [[N_VEC8]]
-; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP25]])
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]]
@@ -943,7 +941,7 @@ define i32 @g(i64 %n) {
; CHECK-NEXT: [[SELECT_I:%.*]] = select i1 [[EXITCOND]], i32 0, i32 2
; CHECK-NEXT: [[SELECT_NEXT]] = or i32 [[SELECT_I]], [[SELECT]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[SELECT_NEXT_LCSSA:%.*]] = phi i32 [ [[SELECT_NEXT]], [[LOOP]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[SELECT_NEXT_LCSSA]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
index 03087bb883464..98ab9b443c7fe 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll
@@ -199,10 +199,6 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2
-; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
@@ -211,7 +207,9 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) {
; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP16]]
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP17]]
; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP18]]
-; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP14]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP10]], float 1.000000e+01, float 1.000000e+00
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP36]], i64 0
+; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP19]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP21]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
index d4004daf8833c..8081c0e17f865 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -64,39 +64,24 @@ exit:
define void @uniform_load_can_fold_users(ptr noalias %src, ptr %dst, i64 %start, double %d) {
; CHECK-LABEL: define void @uniform_load_can_fold_users(
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[START:%.*]], double [[D:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1
-; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 0)
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 9.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP5]], 9.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = fdiv double [[TMP7]], [[D]]
-; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP4]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP12]], i64 [[TMP10]]
-; CHECK-NEXT: store double [[TMP8]], ptr [[TMP13]], align 8
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP14]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[TMP4]], 1
+; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1
+; CHECK-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 0
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
index 198a30af814ba..372876c5faac6 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
@@ -134,21 +134,18 @@ define i16 @for_phi_removed(ptr %src) {
; UNROLL-NO-IC: [[VECTOR_BODY]]:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
-; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104
; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-IC: [[MIDDLE_BLOCK]]:
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]]
; UNROLL-NO-IC: [[SCALAR_PH]]:
; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]]
; UNROLL-NO-IC: [[LOOP]]:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -199,21 +196,18 @@ define i16 @for_phi_removed(ptr %src) {
; SINK-AFTER: [[VECTOR_BODY]]:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
-; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
-; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer
+; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108
; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SINK-AFTER: [[MIDDLE_BLOCK]]:
-; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]]
; SINK-AFTER: [[SCALAR_PH]]:
; SINK-AFTER-NEXT: br label %[[LOOP:.*]]
; SINK-AFTER: [[LOOP]]:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
index 5e88072517b37..f1757008ca20e 100644
--- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
+++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll
@@ -105,10 +105,10 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = sub i16 1, [[OFF]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], [[OFF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP0:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
@@ -116,7 +116,7 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
@@ -257,12 +257,12 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10)
-; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP3]]
+; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll
index 878fbec452220..be9110ce0093a 100644
--- a/llvm/test/Transforms/LoopVectorize/pr50686.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll
@@ -18,20 +18,16 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]]
-; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
index 9a699826696ec..70adac2103feb 100644
--- a/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll
@@ -84,12 +84,8 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15)
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP0]], 15
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
More information about the llvm-commits
mailing list