[llvm] 7a9fd62 - [VPlan] Use VPlan operand order for VPBlendRecipes. (#139475)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 06:56:40 PDT 2025
Author: Florian Hahn
Date: 2025-05-14T14:56:35+01:00
New Revision: 7a9fd62278a2eab8160fa476c3a64e66786f99ad
URL: https://github.com/llvm/llvm-project/commit/7a9fd62278a2eab8160fa476c3a64e66786f99ad
DIFF: https://github.com/llvm/llvm-project/commit/7a9fd62278a2eab8160fa476c3a64e66786f99ad.diff
LOG: [VPlan] Use VPlan operand order for VPBlendRecipes. (#139475)
Don't use the order of incoming values of IR phis when creating
VPBlendRecipes. Instead, simply use the incoming operands and
blocks from the VPWidenPHIRecipe.
Note that this changes the order of the incoming operands/masks for some
blends.
PR: https://github.com/llvm/llvm-project/pull/139475
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
llvm/test/Transforms/LoopVectorize/if-reduction.ll
llvm/test/Transforms/LoopVectorize/no_outside_user.ll
llvm/test/Transforms/LoopVectorize/phi-cost.ll
llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
llvm/test/Transforms/LoopVectorize/reduction.ll
llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
llvm/test/Transforms/LoopVectorize/uniform-blend.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6a27a1339f381..1611c6d3a4437 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8546,19 +8546,12 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(VPWidenPHIRecipe *PhiR) {
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- // Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
- // TODO: Add operands and masks in order from the VPlan predecessors.
- DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
- auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
- for (const auto &[Idx, Pred] : enumerate(predecessors(Phi->getParent())))
- VPIncomingValues[Pred] = PhiR->getOperand(Idx);
-
unsigned NumIncoming = PhiR->getNumIncoming();
SmallVector<VPValue *, 2> OperandsWithMask;
for (unsigned In = 0; In < NumIncoming; In++) {
- BasicBlock *Pred = Phi->getIncomingBlock(In);
- OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
- VPValue *EdgeMask = getEdgeMask(Pred, Phi->getParent());
+ OperandsWithMask.push_back(PhiR->getIncomingValue(In));
+ const VPBasicBlock *Pred = PhiR->getIncomingBlock(In);
+ VPValue *EdgeMask = getEdgeMask(Pred, PhiR->getParent());
if (!EdgeMask) {
assert(In == 0 && "Both null and non-null edge masks found");
assert(all_equal(PhiR->operands()) &&
@@ -8567,7 +8560,8 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(VPWidenPHIRecipe *PhiR) {
}
OperandsWithMask.push_back(EdgeMask);
}
- return new VPBlendRecipe(Phi, OperandsWithMask);
+ return new VPBlendRecipe(cast<PHINode>(PhiR->getUnderlyingInstr()),
+ OperandsWithMask);
}
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index 3c8bbaa46f275..43b942458a39e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -28,7 +28,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
@@ -219,7 +219,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> splat (i8 1)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP3]], <16 x i8> splat (i8 1), <16 x i8> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index d9ec09ffaa934..2c0fb797d1d10 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -354,7 +354,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP8]])
; TFCOMMON-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
; TFCOMMON-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP10]])
-; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP8]], <vscale x 2 x i64> [[TMP9]], <vscale x 2 x i64> [[TMP11]]
+; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[TMP9]]
; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -397,8 +397,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i1> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[TMP19]])
; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[TMP20]])
-; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i64> [[TMP17]], <vscale x 2 x i64> [[TMP21]]
-; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[TMP18]], <vscale x 2 x i64> [[TMP22]]
+; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP19]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]]
+; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP20]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]]
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index ebbc41f034bd1..88d9ed2ce201e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -46,8 +46,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true)
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
-; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 8 x i1> [[TMP19]], i32 0
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[EXT]], i64 [[INDEX]], i64 poison
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP21]], i32 0
+; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]])
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 9cf7bc9fe07d6..3dc17e615048e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -20,9 +20,9 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX]], i32 9)
-; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <16 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP4]], <16 x i1> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP3]], <16 x i32> [[TMP2]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 492eb091175e2..1588d02eff3db 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -33,10 +33,10 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) {
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19)
; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> splat (i32 3), <4 x i32> [[TMP10]]
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -141,16 +141,14 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) {
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19)
-; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
-; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11)
; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9)
-; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]]
-; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18)
-; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> [[TMP12]]
+; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9)
+; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> [[TMP13]]
+; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI4]], <4 x i32> splat (i32 18)
; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]]
; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index ef4cde4027a9a..e6c2242dd0c4e 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -1207,11 +1207,11 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp uge <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> [[TMP8]]
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]]
; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -1335,8 +1335,8 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]]
-; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
+; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
index 3256b80b20c82..ba85bb4d84f5c 100644
--- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -185,10 +185,10 @@ define i32 @test3(i32 %N) {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10)
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 2), <2 x i32> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2)
+; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
index b857385e38535..aee80c9015463 100644
--- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
@@ -102,10 +102,10 @@ define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 19)
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], splat (i32 4)
; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 4), <2 x i32> splat (i32 5)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> splat (i32 3)
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> splat (i32 3), <2 x i32> [[TMP8]]
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 9)
; CHECK-NEXT: store <2 x i32> [[PREDPHI3]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
index 67cf46e7d0f88..bd9647188911a 100644
--- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
@@ -13,8 +13,6 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
-; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i1> [[TMP4]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT5]], splat (i32 1)
; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP6]], <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -25,10 +23,10 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10)
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]]
-; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]]
-; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]]
-; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[PREDPHI6]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> splat (i32 9)
+; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]]
+; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index 05f26b8a0a273..4c7a74ed05b58 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -1354,17 +1354,17 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
-; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]]
+; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index f6a1ebf8b0fe9..0ad1581f0a4a1 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -689,17 +689,17 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
-; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]]
+; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index 09d3ca0b0bf20..757be041afbb5 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -760,17 +760,17 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP4:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
+; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
-; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
-; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
-; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]]
+; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP9]], <4 x float> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP3]], <4 x float> [[PREDPHI2]], <4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 8d3ffb48a5b6c..d5a206ff21da0 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -105,11 +105,11 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [32 x i16], ptr @src, i16 0, i16 [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]]
-; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> [[WIDE_LOAD]], <2 x i16> splat (i16 1)
+; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0
; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2
@@ -295,11 +295,11 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP15:%.*]] = icmp sle <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]]
-; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> splat (i16 1)
+; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[PREDPHI]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
index 40a1eb477a212..70c0483ee9f4b 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
@@ -46,7 +46,7 @@ while.end:
define void @reuse_const_btc(ptr %A) {
; CHECK-LABEL: @reuse_const_btc
; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, splat (i32 13)
-; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> splat (i32 12), <4 x i32> splat (i32 13)
+; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> splat (i32 13), <4 x i32> splat (i32 12)
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index a030750ed0b6e..70094ed649ec2 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -131,12 +131,13 @@ define void @blend_chain_iv(i1 %c) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_IND]], <4 x i64> undef
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP10]], <4 x i64> undef, <4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]]
More information about the llvm-commits
mailing list