[llvm] 36fc291 - [VPlan] Implement VPBlendRecipe::computeCost.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 13:34:08 PDT 2024
Author: Florian Hahn
Date: 2024-10-08T21:33:42+01:00
New Revision: 36fc291b6ec6d4c8e3d956d6855631ab2db7b05a
URL: https://github.com/llvm/llvm-project/commit/36fc291b6ec6d4c8e3d956d6855631ab2db7b05a
DIFF: https://github.com/llvm/llvm-project/commit/36fc291b6ec6d4c8e3d956d6855631ab2db7b05a.diff
LOG: [VPlan] Implement VPBlendRecipe::computeCost.
Implement VPBlendRecipe::computeCost. VPBlendRecipe is currently is also
used if only the first lane is used.
This also requires pre-computing costs for forced scalars and
instructions considered profitable to scalarize. For those, the cost
will be computed separately in the legacy cost model. This will also be
needed when implementing VPReplicateRecipe::computeCost.
Added:
llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 001c8987667df8..370e16353ee7e8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -961,6 +961,8 @@ using InstructionVFPair = std::pair<Instruction *, ElementCount>;
/// TargetTransformInfo to query the
diff erent backends for the cost of
///
diff erent operations.
class LoopVectorizationCostModel {
+ friend class LoopVectorizationPlanner;
+
public:
LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -7263,6 +7265,32 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
auto BranchCost = CostCtx.getLegacyCost(BB->getTerminator(), VF);
Cost += BranchCost;
}
+
+ // Pre-compute costs for instructions that are forced-scalar or profitable to
+ // scalarize. Their costs will be computed separately in the legacy cost
+ // model.
+ for (Instruction *ForcedScalar : CM.ForcedScalars[VF]) {
+ if (CostCtx.skipCostComputation(ForcedScalar, VF.isVector()))
+ continue;
+ CostCtx.SkipCostComputation.insert(ForcedScalar);
+ InstructionCost ForcedCost = CostCtx.getLegacyCost(ForcedScalar, VF);
+ LLVM_DEBUG({
+ dbgs() << "Cost of " << ForcedCost << " for VF " << VF
+ << ": forced scalar " << *ForcedScalar << "\n";
+ });
+ Cost += ForcedCost;
+ }
+ for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
+ if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
+ continue;
+ CostCtx.SkipCostComputation.insert(Scalarized);
+ LLVM_DEBUG({
+ dbgs() << "Cost of " << ScalarCost << " for VF " << VF
+ << ": profitable to scalarize " << *Scalarized << "\n";
+ });
+ Cost += ScalarCost;
+ }
+
return Cost;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8c5246d613c13d..41c62545ef2a87 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2241,6 +2241,10 @@ class VPBlendRecipe : public VPSingleDefRecipe {
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;
+ /// Return the cost of this VPWidenMemoryRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 44d8601e5d3408..096a38837e22fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1896,6 +1896,22 @@ void VPBlendRecipe::execute(VPTransformState &State) {
State.set(this, Result, OnlyFirstLaneUsed);
}
+InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ // Handle cases where only the first lane is used the same way as the legacy
+ // cost model.
+ if (vputils::onlyFirstLaneUsed(this))
+ return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
+
+ Type *ResultTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
+ Type *CmpTy = ToVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
+ return (getNumIncomingValues() - 1) *
+ Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
+ CmpInst::BAD_ICMP_PREDICATE, CostKind);
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
new file mode 100644
index 00000000000000..6572dda70d42bb
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -0,0 +1,465 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr %dst) {
+; CHECK-LABEL: define void @test_blend_feeding_replicated_store_1(
+; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP43]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP43]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 16, i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP43]], [[TMP2]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[DST]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP7]], <16 x ptr> [[BROADCAST_SPLAT]], <16 x ptr> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP9]], i32 0
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 0
+; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP9]], i32 1
+; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 1
+; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP9]], i32 2
+; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 2
+; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP9]], i32 3
+; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 3
+; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[TMP9]], i32 4
+; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 4
+; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i1> [[TMP9]], i32 5
+; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 5
+; CHECK-NEXT: store i8 0, ptr [[TMP21]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i1> [[TMP9]], i32 6
+; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK: [[PRED_STORE_IF11]]:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 6
+; CHECK-NEXT: store i8 0, ptr [[TMP23]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; CHECK: [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i1> [[TMP9]], i32 7
+; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; CHECK: [[PRED_STORE_IF13]]:
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 7
+; CHECK-NEXT: store i8 0, ptr [[TMP25]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; CHECK: [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i1> [[TMP9]], i32 8
+; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK: [[PRED_STORE_IF15]]:
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 8
+; CHECK-NEXT: store i8 0, ptr [[TMP27]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; CHECK: [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[TMP9]], i32 9
+; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK: [[PRED_STORE_IF17]]:
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 9
+; CHECK-NEXT: store i8 0, ptr [[TMP29]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; CHECK: [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i1> [[TMP9]], i32 10
+; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK: [[PRED_STORE_IF19]]:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 10
+; CHECK-NEXT: store i8 0, ptr [[TMP31]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; CHECK: [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i1> [[TMP9]], i32 11
+; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 11
+; CHECK-NEXT: store i8 0, ptr [[TMP33]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; CHECK: [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[TMP9]], i32 12
+; CHECK-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK: [[PRED_STORE_IF23]]:
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 12
+; CHECK-NEXT: store i8 0, ptr [[TMP35]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; CHECK: [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i1> [[TMP9]], i32 13
+; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK: [[PRED_STORE_IF25]]:
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 13
+; CHECK-NEXT: store i8 0, ptr [[TMP37]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; CHECK: [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i1> [[TMP9]], i32 14
+; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK: [[PRED_STORE_IF27]]:
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 14
+; CHECK-NEXT: store i8 0, ptr [[TMP39]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[TMP9]], i32 15
+; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
+; CHECK: [[PRED_STORE_IF29]]:
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x ptr> [[PREDPHI]], i32 15
+; CHECK-NEXT: store i8 0, ptr [[TMP41]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; CHECK: [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[CONTINUE:.*]]
+; CHECK: [[CONTINUE]]:
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_SRC]], align 4
+; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP2_NOT]], label %[[THEN:.*]], label %[[THEN_2:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: br i1 false, label %[[THEN_2]], label %[[LOOP_LATCH]]
+; CHECK: [[THEN_2]]:
+; CHECK-NEXT: [[P:%.*]] = phi ptr [ null, %[[CONTINUE]] ], [ [[DST]], %[[THEN]] ]
+; CHECK-NEXT: store i8 0, ptr [[P]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: br label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header: ; preds = %loop.latch, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %ec = icmp eq i64 %iv, %N
+ br i1 %ec, label %exit, label %continue
+
+continue:
+ %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
+ %0 = load i32, ptr %gep.src, align 4
+ %cmp2.not = icmp slt i32 %0, 0
+ br i1 %cmp2.not, label %then, label %then.2
+
+then:
+ br i1 false, label %then.2, label %loop.latch
+
+then.2:
+ %p = phi ptr [ null, %continue ], [ %dst, %then ]
+ store i8 0, ptr %p, align 1
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ br label %loop.header
+
+exit:
+ ret void
+}
+
+define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i1 %c.0) {
+; CHECK-LABEL: define void @test_blend_feeding_replicated_store_2(
+; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i1 [[C_0:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[C_0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i1> [[TMP6]], [[TMP3]]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> zeroinitializer, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
+; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
+; CHECK: [[PRED_STORE_CONTINUE]]:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1
+; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; CHECK: [[PRED_STORE_IF1]]:
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
+; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
+; CHECK: [[PRED_STORE_CONTINUE2]]:
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2
+; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; CHECK: [[PRED_STORE_IF3]]:
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
+; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
+; CHECK: [[PRED_STORE_CONTINUE4]]:
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3
+; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
+; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
+; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4
+; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
+; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
+; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5
+; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
+; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]]
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
+; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
+; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6
+; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; CHECK: [[PRED_STORE_IF11]]:
+; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
+; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]]
+; CHECK: [[PRED_STORE_CONTINUE12]]:
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7
+; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
+; CHECK: [[PRED_STORE_IF13]]:
+; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
+; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]]
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
+; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE14]]
+; CHECK: [[PRED_STORE_CONTINUE14]]:
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8
+; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
+; CHECK: [[PRED_STORE_IF15]]:
+; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
+; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]]
+; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
+; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE16]]
+; CHECK: [[PRED_STORE_CONTINUE16]]:
+; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9
+; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
+; CHECK: [[PRED_STORE_IF17]]:
+; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
+; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
+; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
+; CHECK: [[PRED_STORE_CONTINUE18]]:
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10
+; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
+; CHECK: [[PRED_STORE_IF19]]:
+; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
+; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]]
+; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
+; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
+; CHECK: [[PRED_STORE_CONTINUE20]]:
+; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11
+; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
+; CHECK: [[PRED_STORE_IF21]]:
+; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
+; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]]
+; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
+; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
+; CHECK: [[PRED_STORE_CONTINUE22]]:
+; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12
+; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
+; CHECK: [[PRED_STORE_IF23]]:
+; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
+; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]]
+; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
+; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
+; CHECK: [[PRED_STORE_CONTINUE24]]:
+; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13
+; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
+; CHECK: [[PRED_STORE_IF25]]:
+; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
+; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]]
+; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
+; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
+; CHECK: [[PRED_STORE_CONTINUE26]]:
+; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14
+; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
+; CHECK: [[PRED_STORE_IF27]]:
+; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
+; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]]
+; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
+; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
+; CHECK: [[PRED_STORE_CONTINUE28]]:
+; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15
+; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
+; CHECK: [[PRED_STORE_IF29]]:
+; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
+; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]]
+; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
+; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
+; CHECK: [[PRED_STORE_CONTINUE30]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
+; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
+; CHECK: [[LOOP_HEADER]]:
+; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV1]]
+; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
+; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0
+; CHECK-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: br i1 [[C_0]], label %[[LOOP_LATCH]], label %[[THEN]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[P:%.*]] = phi i8 [ 1, %[[LOOP_HEADER]] ], [ 0, %[[ELSE]] ]
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV1]]
+; CHECK-NEXT: store i8 [[P]], ptr [[GEP_DST]], align 1
+; CHECK-NEXT: br label %[[LOOP_LATCH]]
+; CHECK: [[LOOP_LATCH]]:
+; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv
+ %l = load i8, ptr %gep.src, align 1
+ %c.1 = icmp eq i8 %l, 0
+ br i1 %c.1, label %then, label %else
+
+else:
+ br i1 %c.0, label %loop.latch, label %then
+
+then:
+ %p = phi i8 [ 1, %loop.header ], [ 0, %else ]
+ %gep.dst = getelementptr inbounds i8, ptr %dst, i32 %iv
+ store i8 %p, ptr %gep.dst, align 1
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i32 %iv, 1
+ %ec = icmp eq i32 %iv.next, 100
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @test_blend_feeding_replicated_store_3(ptr noalias %src.1, ptr noalias %src.2, ptr noalias %dst, i32 %x, i64 %N, i1 %c.2) {
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
+ %l.1 = load i8, ptr %src.1, align 1
+ %ext = zext i8 %l.1 to i32
+ %mul = mul i32 %x, %ext
+ %div = sdiv i32 %mul, 255
+ %l.2 = load i8, ptr %src.2, align 1
+ %c.1 = icmp eq i8 %l.2, 0
+ br i1 %c.1, label %then, label %else.1
+
+else.1:
+ br i1 %c.2, label %loop.latch, label %else.2
+
+else.2:
+ %trunc.div = trunc i32 %div to i8
+ br label %then
+
+then:
+ %p = phi i8 [ 0, %loop.header ], [ %trunc.div, %else.2 ]
+ store i8 %p, ptr %dst, align 1
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv, %N
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
index 2e477b326b7384..14e29e0d0518a1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll
@@ -23,10 +23,10 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_UREM_IF:.*]], label %[[PRED_UREM_CONTINUE:.*]]
; CHECK: [[PRED_UREM_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = urem i64 [[MUL]], [[X]]
+; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE]]
; CHECK: [[PRED_UREM_CONTINUE]]:
-; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_UREM_IF]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[REM]], %[[PRED_UREM_IF]] ]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_UREM_IF1:.*]], label %[[PRED_UREM_CONTINUE2:.*]]
; CHECK: [[PRED_UREM_IF1]]:
@@ -49,14 +49,14 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: [[TMP12:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP4]], i64 0)
; CHECK-NEXT: [[TMP13:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP9]], i64 0)
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1
+; CHECK-NEXT: [[P:%.*]] = select i1 [[TMP14]], i64 [[TMP12]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[TMP15]], i64 [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[PREDPHI]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[PREDPHI7]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP16]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT: store i64 0, ptr [[TMP18]], align 8
+; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
; CHECK-NEXT: store i64 0, ptr [[TMP19]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
@@ -67,19 +67,19 @@ define void @smax_call_uniform(ptr %dst, i64 %x) {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]]
; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[REM:%.*]] = urem i64 [[MUL]], [[X]]
-; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM]], i64 0)
+; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]]
+; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM1]], i64 0)
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
-; CHECK-NEXT: [[ADD:%.*]] = add i64 [[P]], 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[DST]], i64 [[ADD]]
-; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]]
+; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8
+; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list