[llvm] [VPlan] Explicitly replicate VPInstructions by VF. (PR #155102)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 23 07:30:15 PDT 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/155102
Extend replicateByVF added in #142433 (aa240293190) to also explicitly unroll replicating VPInstructions.
Now the only remaining case where we replicate for all lanes is VPReplicateRecipes in replicate regions.
>From 878d79eb09730e40cb6a57bb5bf91a6bbde000f7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 23 Aug 2025 15:29:15 +0100
Subject: [PATCH] [VPlan] Explicitly replicate VPInstructions by VF.
Extend replicateByVF added in #142433 (aa240293190) to also explicitly
unroll replicating VPInstructions.
Now the only remaining case where we replicate for all lanes is
VPReplicateRecipes in replicate regions.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 26 ++-----
llvm/lib/Transforms/Vectorize/VPlan.h | 21 +++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 30 ++------
.../Transforms/Vectorize/VPlanTransforms.cpp | 32 +++++----
.../Transforms/Vectorize/VPlanTransforms.h | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 69 +++++++++++--------
.../LoopVectorize/pointer-induction.ll | 7 +-
.../LoopVectorize/predicate-switch.ll | 26 ++++---
.../LoopVectorize/vplan-predicate-switch.ll | 12 ++--
9 files changed, 110 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f972efa07eb7e..798c6b0771dc8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -333,6 +333,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
+ assert(IsSingleScalar && "must be a single-scalar at this point");
+ // We need to construct the vector value for a single-scalar value by
+ // broadcasting the scalar to all lanes.
auto *LastInst = cast<Instruction>(get(Def, LastLane));
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
@@ -343,27 +346,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
: std::next(BasicBlock::iterator(LastInst));
Builder.SetInsertPoint(&*NewIP);
- // However, if we are vectorizing, we need to construct the vector values.
- // If the value is known to be uniform after vectorization, we can just
- // broadcast the scalar value corresponding to lane zero. Otherwise, we
- // construct the vector values using insertelement instructions. Since the
- // resulting vectors are stored in State, we will only generate the
- // insertelements once.
- Value *VectorValue = nullptr;
- if (IsSingleScalar) {
- VectorValue = GetBroadcastInstrs(ScalarValue);
- set(Def, VectorValue);
- } else {
- assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- assert(isa<VPInstruction>(Def) &&
- "Explicit BuildVector recipes must have"
- "handled packing for non-VPInstructions.");
- // Initialize packing with insertelements to start from poison.
- VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
- for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
- VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
- set(Def, VectorValue);
- }
+ Value *VectorValue = GetBroadcastInstrs(ScalarValue);
+ set(Def, VectorValue);
Builder.restoreIP(OldIP);
return VectorValue;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6814dc5de6716..7412f0a8d5bf2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -897,6 +897,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
return R && classof(R);
}
+ virtual VPRecipeWithIRFlags *clone() override = 0;
+
void execute(VPTransformState &State) override = 0;
/// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
@@ -1040,13 +1042,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
VScale,
};
-private:
- typedef unsigned char OpcodeTy;
- OpcodeTy Opcode;
-
- /// An optional name that can be used for the generated IR instruction.
- const std::string Name;
-
/// Returns true if this VPInstruction generates scalar values for all lanes.
/// Most VPInstructions generate a single value per part, either vector or
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
@@ -1055,6 +1050,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// underlying ingredient.
bool doesGeneratePerAllLanes() const;
+private:
+ typedef unsigned char OpcodeTy;
+ OpcodeTy Opcode;
+
+ /// An optional name that can be used for the generated IR instruction.
+ const std::string Name;
+
/// Returns true if we can generate a scalar for the first lane only if
/// needed.
bool canGenerateScalarForFirstLane() const;
@@ -1064,11 +1066,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// existing value is returned rather than a generated one.
Value *generate(VPTransformState &State);
- /// Utility methods serving execute(): generates a scalar single instance of
- /// the modeled instruction for a given lane. \returns the scalar generated
- /// value for lane \p Lane.
- Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
-
#if !defined(NDEBUG)
/// Return the number of operands determined by the opcode of the
/// VPInstruction. Returns -1u if the number of operands cannot be determined
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a92540f457c73..67156f0e54987 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -525,16 +525,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
}
}
-Value *VPInstruction::generatePerLane(VPTransformState &State,
- const VPLane &Lane) {
- IRBuilderBase &Builder = State.Builder;
-
- assert(getOpcode() == VPInstruction::PtrAdd &&
- "only PtrAdd opcodes are supported for now");
- return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
- State.get(getOperand(1), Lane), Name);
-}
-
/// Create a conditional branch using \p Cond branching to the successors of \p
/// VPBB. Note that the first successor is always forward (i.e. not created yet)
/// while the second successor may already have been created (if it is a header
@@ -1123,24 +1113,13 @@ void VPInstruction::execute(VPTransformState &State) {
"Set flags not supported for the provided opcode");
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
- bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
- (vputils::onlyFirstLaneUsed(this) ||
- isVectorToScalar() || isSingleScalar());
- bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
- if (GeneratesPerAllLanes) {
- for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
- Lane != NumLanes; ++Lane) {
- Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
- assert(GeneratedValue && "generatePerLane must produce a value");
- State.set(this, GeneratedValue, VPLane(Lane));
- }
- return;
- }
-
Value *GeneratedValue = generate(State);
if (!hasResult())
return;
assert(GeneratedValue && "generate must produce a value");
+ bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
+ (vputils::onlyFirstLaneUsed(this) ||
+ isVectorToScalar() || isSingleScalar());
assert((((GeneratedValue->getType()->isVectorTy() ||
GeneratedValue->getType()->isStructTy()) ==
!GeneratesPerFirstLaneOnly) ||
@@ -1213,6 +1192,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::Broadcast:
case VPInstruction::ReductionStartVector:
return true;
+ case VPInstruction::BuildStructVector:
+ case VPInstruction::BuildVector:
+ return getNumOperands() > 1;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 677d97e54d556..c4d6a3826a4a0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3354,34 +3354,40 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
vp_depth_first_shallow(Plan.getEntry()));
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
- // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
- // excluding ones in replicate regions. Those are not materialized explicitly
- // yet. Those vector users are still handled in VPReplicateRegion::execute(),
- // via shouldPack().
+ // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
+ // VPInstructions, excluding ones in replicate regions. Those are not
+ // materialized explicitly yet. Those vector users are still handled in
+ // VPReplicateRegion::execute(), via shouldPack().
// TODO: materialize build vectors for replicating recipes in replicating
// regions.
// TODO: materialize build vectors for VPInstructions.
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
+ auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
+ if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR))
+ continue;
+ auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
- return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
+ return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
};
- if (!RepR || RepR->isSingleScalar() ||
- none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
+ if ((isa<VPReplicateRecipe>(DefR) &&
+ cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
+ (isa<VPInstruction>(DefR) &&
+ !cast<VPInstruction>(DefR)->doesGeneratePerAllLanes()) ||
+ vputils::onlyFirstLaneUsed(DefR) ||
+ none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
continue;
- Type *ScalarTy = TypeInfo.inferScalarType(RepR);
+ Type *ScalarTy = TypeInfo.inferScalarType(DefR);
unsigned Opcode = ScalarTy->isStructTy()
? VPInstruction::BuildStructVector
: VPInstruction::BuildVector;
- auto *BuildVector = new VPInstruction(Opcode, {RepR});
- BuildVector->insertAfter(RepR);
+ auto *BuildVector = new VPInstruction(Opcode, {DefR});
+ BuildVector->insertAfter(DefR);
- RepR->replaceUsesWithIf(
+ DefR->replaceUsesWithIf(
BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
VPUser &U, unsigned) {
return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index def46b5d6d75c..d9952e2fc180f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -120,8 +120,8 @@ struct VPlanTransforms {
/// Explicitly unroll \p Plan by \p UF.
static void unrollByUF(VPlan &Plan, unsigned UF);
- /// Replace each VPReplicateRecipe outside on any replicate region in \p Plan
- /// with \p VF single-scalar recipes.
+ /// Replace each VPReplicateRecipe and replicating VPInstruction outside on
+ /// any replicate region in \p Plan with \p VF single-scalar recipes.
/// TODO: Also replicate VPReplicateRecipes inside replicate regions, thereby
/// dissolving the latter.
static void replicateByVF(VPlan &Plan, ElementCount VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index d47d76d203738..95bdc1383afee 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -464,15 +464,15 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
VPlanTransforms::removeDeadRecipes(Plan);
}
-/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
-/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
-static VPReplicateRecipe *
+/// Create a single-scalar clone of \p DefR for lane \p Lane. Use \p
+/// Def2LaneDefs to look up scalar definitions for operands of \DefR.
+static VPRecipeWithIRFlags *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
- VPReplicateRecipe *RepR, VPLane Lane,
+ VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
- for (VPValue *Op : RepR->operands()) {
+ for (VPValue *Op : DefR->operands()) {
// If Op is a definition that has been unrolled, directly use the clone for
// the corresponding lane.
auto LaneDefs = Def2LaneDefs.find(Op);
@@ -502,11 +502,19 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
NewOps.push_back(Ext);
}
- auto *New =
- new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
- /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
- New->transferFlags(*RepR);
- New->insertBefore(RepR);
+ VPRecipeWithIRFlags *New;
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) {
+ New =
+ new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
+ /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+ } else {
+ New = DefR->clone();
+ for (const auto &[Idx, Op] : enumerate(NewOps)) {
+ New->setOperand(Idx, Op);
+ }
+ }
+ New->transferFlags(*DefR);
+ New->insertBefore(DefR);
return New;
}
@@ -531,41 +539,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR || RepR->isSingleScalar())
+ auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R);
+ if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR))
+ continue;
+ if ((isa<VPReplicateRecipe>(DefR) &&
+ cast<VPReplicateRecipe>(DefR)->isSingleScalar()) ||
+ (isa<VPInstruction>(DefR) &&
+ !cast<VPInstruction>(DefR)->doesGeneratePerAllLanes()))
continue;
- VPBuilder Builder(RepR);
- if (RepR->getNumUsers() == 0) {
- if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
- vputils::isSingleScalar(RepR->getOperand(1))) {
+ VPBuilder Builder(DefR);
+ if (DefR->getNumUsers() == 0) {
+ if (isa<StoreInst>(DefR->getUnderlyingInstr()) &&
+ vputils::isSingleScalar(DefR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
+ cloneForLane(Plan, Builder, IdxTy, DefR, VPLane::getLastLaneForVF(VF),
Def2LaneDefs);
} else {
- // Create single-scalar version of RepR for all lanes.
+ // Create single-scalar version of DefR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
+ cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
}
- RepR->eraseFromParent();
+ DefR->eraseFromParent();
continue;
}
- /// Create single-scalar version of RepR for all lanes.
+ /// Create single-scalar version of DefR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
LaneDefs.push_back(
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
+ cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs));
- Def2LaneDefs[RepR] = LaneDefs;
+ Def2LaneDefs[DefR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
- RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
- return U.onlyFirstLaneUsed(RepR);
+ DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
+ return U.onlyFirstLaneUsed(DefR);
});
- // Update each build vector user that currently has RepR as its only
+ // Update each build vector user that currently has DefR as its only
// operand, to have all LaneDefs as its operands.
- for (VPUser *U : to_vector(RepR->users())) {
+ for (VPUser *U : to_vector(DefR->users())) {
auto *VPI = dyn_cast<VPInstruction>(U);
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
@@ -577,7 +590,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
for (VPValue *LaneDef : drop_begin(LaneDefs))
VPI->addOperand(LaneDef);
}
- ToRemove.push_back(RepR);
+ ToRemove.push_back(DefR);
}
}
for (auto *R : reverse(ToRemove))
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index d2c53f47a6670..a633dfee066ed 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -33,6 +33,10 @@ define void @a(ptr readnone %b) {
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3
@@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) {
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]]
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
-; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
-; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
-; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]]
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index f59d4aa99918f..f33e12b418fa6 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -22,6 +22,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC1-NEXT: [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
; IC1-NEXT: [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
@@ -117,8 +119,12 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
+; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
; IC2-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
+; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0
+; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1
; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1
; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
@@ -338,21 +344,21 @@ define void @switch_to_header(ptr %start) {
; IC1-NEXT: [[ENTRY:.*]]:
; IC1-NEXT: br label %[[LOOP_HEADER:.*]]
; IC1: [[LOOP_HEADER]]:
-; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
+; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC1-NEXT: i64 120, label %[[IF_THEN]]
+; IC1-NEXT: i64 120, label %[[IF_THEN1]]
; IC1-NEXT: i64 100, label %[[LOOP_LATCH]]
; IC1-NEXT: ]
-; IC1: [[IF_THEN]]:
+; IC1: [[IF_THEN1]]:
; IC1-NEXT: br label %[[LOOP_HEADER]]
-; IC1: [[IF_THEN1:.*:]]
+; IC1: [[IF_THEN:.*:]]
; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
; IC1-NEXT: store i64 42, ptr [[GEP]], align 1
; IC1-NEXT: unreachable
; IC1: [[LOOP_LATCH]]:
; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
+; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
; IC1: [[EXIT]]:
; IC1-NEXT: ret void
;
@@ -361,21 +367,21 @@ define void @switch_to_header(ptr %start) {
; IC2-NEXT: [[ENTRY:.*]]:
; IC2-NEXT: br label %[[LOOP_HEADER:.*]]
; IC2: [[LOOP_HEADER]]:
-; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
+; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ]
; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC2-NEXT: i64 120, label %[[IF_THEN]]
+; IC2-NEXT: i64 120, label %[[IF_THEN1]]
; IC2-NEXT: i64 100, label %[[LOOP_LATCH]]
; IC2-NEXT: ]
-; IC2: [[IF_THEN]]:
+; IC2: [[IF_THEN1]]:
; IC2-NEXT: br label %[[LOOP_HEADER]]
-; IC2: [[IF_THEN1:.*:]]
+; IC2: [[IF_THEN:.*:]]
; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
; IC2-NEXT: store i64 42, ptr [[GEP]], align 1
; IC2-NEXT: unreachable
; IC2: [[LOOP_LATCH]]:
; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
+; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]]
; IC2: [[EXIT]]:
; IC2-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
index 32aadcfb164e6..3abad7f6a43df 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll
@@ -17,7 +17,11 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
-; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
+; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
+; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
+; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
; CHECK-NEXT: EMIT vp<[[C2:%.+]]> = icmp eq ir<%l>, ir<13>
@@ -31,7 +35,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR]]>
+; CHECK-NEXT: REPLICATE store ir<0>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
@@ -48,7 +52,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR]]>
+; CHECK-NEXT: REPLICATE store ir<42>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
@@ -65,7 +69,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
-; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR]]>
+; CHECK-NEXT: REPLICATE store ir<2>, vp<[[PTR_VEC]]>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
More information about the llvm-commits
mailing list