[llvm] [VPlan] Introduce VPInstructionWithType, use instead of VPScalarCast(NFC) (PR #129706)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 08:06:29 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/129706
>From 29a14c1d2181297dd98cbabb5900cdc490f31726 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 3 Mar 2025 22:43:28 +0000
Subject: [PATCH 1/5] [VPlan] Introduce VPInstructionWithType, use instead of
VPScalarCast (NFC)
There are some opcodes that currently require specialized recipes, due
to their result type not being implied by their operands, including
casts.
This leads to duplication from defining multiple full recipes.
This patch introduces a new VPInstructionWithType subclass that also
stores the result type. The general idea is to have opcodes needing to
specify a result type to use this general recipe. The current patch
replaces VPScalarCastRecipe with VInstructionWithType, a similar patch
for VPWidenCastRecipe will follow soon.
There are a few proposed opcodes that should also benefit, without the
need of workarounds:
* https://github.com/llvm/llvm-project/pull/129508
* https://github.com/llvm/llvm-project/pull/119284
---
.../Vectorize/LoopVectorizationPlanner.h | 6 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 5 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 99 ++++++++++---------
.../Transforms/Vectorize/VPlanAnalysis.cpp | 7 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 72 +++++++-------
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 11 ++-
llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 -
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 +-
.../RISCV/vplan-vp-call-intrinsics.ll | 18 ++--
.../RISCV/vplan-vp-cast-intrinsics.ll | 20 ++--
...an-vp-intrinsics-fixed-order-recurrence.ll | 4 +-
.../RISCV/vplan-vp-intrinsics-reduction.ll | 4 +-
.../RISCV/vplan-vp-intrinsics.ll | 2 +-
.../RISCV/vplan-vp-select-intrinsics.ll | 2 +-
.../interleave-and-scalarize-only.ll | 4 +-
15 files changed, 135 insertions(+), 124 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ed3e45dd2c6c8..45e4fcad01b6c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -246,10 +246,10 @@ class VPBuilder {
new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
}
- VPScalarCastRecipe *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
+ Type *ResultTy, DebugLoc DL) {
return tryInsertInstruction(
- new VPScalarCastRecipe(Opcode, Op, ResultTy, DL));
+ new VPInstructionWithType(Opcode, Op, ResultTy, DL));
}
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cb860a472d8f7..1f15b96cd6518 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4502,7 +4502,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
switch (R.getVPDefID()) {
case VPDef::VPDerivedIVSC:
case VPDef::VPScalarIVStepsSC:
- case VPDef::VPScalarCastSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
@@ -10396,8 +10395,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
- isa<VPScalarCastRecipe>(U) ||
isa<VPDerivedIVRecipe>(U) ||
+ Instruction::isCast(
+ cast<VPInstruction>(U)->getOpcode()) ||
+
cast<VPInstruction>(U)->getOpcode() ==
Instruction::Add;
}) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b1288c42b20f2..4ae34e3e4d552 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -533,7 +533,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
- case VPRecipeBase::VPScalarCastSC:
case VPRecipeBase::VPScalarPHISC:
case VPRecipeBase::VPPartialReductionSC:
return true;
@@ -1026,6 +1025,56 @@ class VPInstruction : public VPRecipeWithIRFlags,
StringRef getName() const { return Name; }
};
+/// A specialization of VPInstruction augmenting it with a dedicated result
+/// type, to be used when the opcode and operands of the VPInstruction don't
+/// directly determine the result type.
+class VPInstructionWithType : public VPInstruction {
+ /// Scalar result type produced by the recipe.
+ Type *ResultTy;
+
+ Value *generate(VPTransformState &State);
+
+public:
+ VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
+ Type *ResultTy, DebugLoc DL, const Twine &Name = "")
+ : VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}
+
+ static inline bool classof(const VPRecipeBase *R) {
+ auto *VPI = dyn_cast<VPInstruction>(R);
+ return VPI && Instruction::isCast(VPI->getOpcode());
+ }
+
+ static inline bool classof(const VPUser *R) {
+ return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
+ }
+
+ VPInstruction *clone() override {
+ auto *New =
+ new VPInstructionWithType(getOpcode(), {getOperand(0)}, getResultType(),
+ getDebugLoc(), getName());
+ New->setUnderlyingValue(getUnderlyingValue());
+ return New;
+ }
+
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPIRInstruction.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override {
+ return 0;
+ }
+
+ Type *getResultType() const { return ResultTy; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override;
+};
+
/// A recipe to wrap on original IR instruction not to be modified during
/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
/// and it is used to add a new incoming value for the single predecessor VPBB.
@@ -1183,54 +1232,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
-/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
-class VPScalarCastRecipe : public VPSingleDefRecipe {
- Instruction::CastOps Opcode;
-
- Type *ResultTy;
-
- Value *generate(VPTransformState &State);
-
-public:
- VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- DebugLoc DL)
- : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
- ResultTy(ResultTy) {}
-
- ~VPScalarCastRecipe() override = default;
-
- VPScalarCastRecipe *clone() override {
- return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
- getDebugLoc());
- }
-
- VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
-
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPScalarCastRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // TODO: Compute accurate cost after retiring the legacy cost model.
- return 0;
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Returns the result type of the cast.
- Type *getResultType() const { return ResultTy; }
-
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- // At the moment, only uniform codegen is implemented.
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-};
-
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 6f6875f0e5e0e..bc81c6d1862d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -252,20 +252,17 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
+ .Case<VPInstructionWithType, VPWidenIntrinsicRecipe>(
+ [](const auto *R) { return R->getResultType(); })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
- .Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
- return R->getResultType();
- })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
})
.Case<VPWidenCastRecipe>(
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
- .Case<VPScalarCastRecipe>(
- [](const VPScalarCastRecipe *R) { return R->getResultType(); })
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
return R->getSCEV()->getType();
})
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d154d54c37862..ecdb5e6788bc9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -148,7 +148,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
switch (getVPDefID()) {
case VPDerivedIVSC:
case VPPredInstPHISC:
- case VPScalarCastSC:
case VPReverseVectorPointerSC:
return false;
case VPInstructionSC:
@@ -413,7 +412,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const {
}
bool VPInstruction::canGenerateScalarForFirstLane() const {
- if (Instruction::isBinaryOp(getOpcode()))
+ if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return true;
if (isSingleScalar() || isVectorToScalar())
return true;
@@ -961,6 +960,43 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif
+Value *VPInstructionWithType::generate(VPTransformState &State) {
+ State.setDebugLocFrom(getDebugLoc());
+ assert(vputils::onlyFirstLaneUsed(this) &&
+ "Codegen only implemented for first lane.");
+ switch (getOpcode()) {
+ case Instruction::SExt:
+ case Instruction::ZExt:
+ case Instruction::Trunc: {
+ // Note: SExt/ZExt not used yet.
+ Value *Op = State.get(getOperand(0), VPLane(0));
+ return State.Builder.CreateCast(Instruction::CastOps(getOpcode()), Op,
+ ResultTy);
+ }
+ default:
+ llvm_unreachable("opcode not implemented yet");
+ }
+}
+
+void VPInstructionWithType::execute(VPTransformState &State) {
+ State.set(this, generate(State), VPLane(0));
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "EMIT ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << Instruction::getOpcodeName(getOpcode()) << " ";
+ printOperands(O, SlotTracker);
+ O << " to " << *ResultTy;
+}
+#endif
+
+bool VPInstructionWithType::onlyFirstLaneUsed(const VPValue *Op) const {
+ return vputils::onlyFirstLaneUsed(this);
+}
+
void VPIRInstruction::execute(VPTransformState &State) {
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
"Only PHINodes can have extra operands");
@@ -2436,38 +2472,6 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
- State.setDebugLocFrom(getDebugLoc());
- assert(vputils::onlyFirstLaneUsed(this) &&
- "Codegen only implemented for first lane.");
- switch (Opcode) {
- case Instruction::SExt:
- case Instruction::ZExt:
- case Instruction::Trunc: {
- // Note: SExt/ZExt not used yet.
- Value *Op = State.get(getOperand(0), VPLane(0));
- return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
- }
- default:
- llvm_unreachable("opcode not implemented yet");
- }
-}
-
-void VPScalarCastRecipe ::execute(VPTransformState &State) {
- State.set(this, generate(State), VPLane(0));
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const {
- O << Indent << "SCALAR-CAST ";
- printAsOperand(O, SlotTracker);
- O << " = " << Instruction::getOpcodeName(Opcode) << " ";
- printOperands(O, SlotTracker);
- O << " to " << *ResultTy;
-}
-#endif
-
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
assert(State.Lane && "Branch on Mask works only on single instance.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 1a7322ec0aff6..e13ed0f6d986e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -113,7 +113,16 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
all_of(R->operands(),
[](VPValue *Op) { return isUniformAcrossVFsAndUFs(Op); });
})
- .Case<VPScalarCastRecipe, VPWidenCastRecipe>([](const auto *R) {
+ .Case<VPInstruction>([](const auto *VPI) {
+ return Instruction::isCast(VPI->getOpcode())
+ ? all_of(VPI->operands(),
+ [](VPValue *Op) {
+ return isUniformAcrossVFsAndUFs(Op);
+ })
+ : false;
+ })
+
+ .Case<VPWidenCastRecipe>([](const auto *R) {
// A cast is uniform according to its operand.
return isUniformAcrossVFsAndUFs(R->getOperand(0));
})
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0a59b137bbd79..1777ab0b71093 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -332,7 +332,6 @@ class VPDef {
VPReductionSC,
VPPartialReductionSC,
VPReplicateSC,
- VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
VPReverseVectorPointerSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 1b3b69ea6a13d..882323d5a1e4f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -146,8 +146,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe,
VPScalarPHIRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
- .Case<VPScalarCastRecipe>(
- [&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
+ .Case<VPInstructionWithType>(
+ [&](const auto *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {
if (I->getOpcode() != Instruction::Add) {
errs() << "EVL is used as an operand in non-VPInstruction::Add\n";
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
index a213608857728..c81f650c338ed 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
@@ -34,7 +34,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -90,7 +90,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -146,7 +146,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -202,7 +202,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -255,7 +255,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -306,7 +306,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -359,7 +359,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -414,7 +414,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -467,7 +467,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
index b8f20aa670b5c..8886bfcd1652b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -30,7 +30,7 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -83,7 +83,7 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ZEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -134,7 +134,7 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -185,7 +185,7 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPEXT]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -236,7 +236,7 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTRUNC]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -287,7 +287,7 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[SITOFP]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -338,7 +338,7 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[UITOFP]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -389,7 +389,7 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOSI]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -440,7 +440,7 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[FPTOUI]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -491,7 +491,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[INTTOPTR]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
index 0bcfe13832ae7..75f2499f935d4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll
@@ -19,7 +19,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: Successor(s): vector.ph
; IF-EVL-EMPTY:
; IF-EVL: vector.ph:
-; IF-EVL-NEXT: SCALAR-CAST vp<[[VF32:%[0-9]+]]> = trunc vp<[[VF]]> to i32
+; IF-EVL-NEXT: EMIT vp<[[VF32:%[0-9]+]]> = trunc vp<[[VF]]> to i32
; IF-EVL-NEXT: Successor(s): vector loop
; IF-EVL-EMPTY:
; IF-EVL: <x1> vector loop: {
@@ -39,7 +39,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 3594b3f047363..04eff34854806 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -51,7 +51,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]>
; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir<true>, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>)
-; IF-EVL-OUTLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-OUTLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
@@ -102,7 +102,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
-; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
index 4cef3f029e20f..3b5ae7811a83b 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll
@@ -39,7 +39,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
index 67be80393d829..adea9ca03feb2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -50,7 +50,7 @@
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
- ; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
+ ; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT]]> = add vp<[[IV]]>, ir<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, ir<[[VTC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index c3164762e8130..e98c3fd6b593d 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -202,14 +202,14 @@ exit:
; DBG-NEXT: Successor(s): vector.ph
; DBG-EMPTY:
; DBG-NEXT: vector.ph:
-; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
+; DBG-NEXT: EMIT vp<[[CAST:%.+]]> = trunc ir<1> to i32
; DBG-NEXT: Successor(s): vector loop
; DBG-EMPTY:
; DBG-NEXT: <x1> vector loop: {
; DBG-NEXT: vector.body:
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
-; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
+; DBG-NEXT: EMIT vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
>From 060d0945ac181ba64e46bc59c66680a10872319c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 4 Mar 2025 14:25:23 +0000
Subject: [PATCH 2/5] !fixup remove some stray newlines and unneeded functions.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 1 -
llvm/lib/Transforms/Vectorize/VPlan.h | 3 +--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 +-----
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 1 -
4 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1f15b96cd6518..6ad7d5a07517e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10398,7 +10398,6 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
isa<VPDerivedIVRecipe>(U) ||
Instruction::isCast(
cast<VPInstruction>(U)->getOpcode()) ||
-
cast<VPInstruction>(U)->getOpcode() ==
Instruction::Add;
}) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 4ae34e3e4d552..b06cea2d71e3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1061,6 +1061,7 @@ class VPInstructionWithType : public VPInstruction {
/// Return the cost of this VPIRInstruction.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
+ // TODO: Compute accurate cost after retiring the legacy cost model.
return 0;
}
@@ -1071,8 +1072,6 @@ class VPInstructionWithType : public VPInstruction {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
-
- bool onlyFirstLaneUsed(const VPValue *Op) const override;
};
/// A recipe to wrap on original IR instruction not to be modified during
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ecdb5e6788bc9..451ede50e7cd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -831,7 +831,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
- if (Instruction::isBinaryOp(getOpcode()))
+ if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return vputils::onlyFirstLaneUsed(this);
switch (getOpcode()) {
@@ -993,10 +993,6 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
}
#endif
-bool VPInstructionWithType::onlyFirstLaneUsed(const VPValue *Op) const {
- return vputils::onlyFirstLaneUsed(this);
-}
-
void VPIRInstruction::execute(VPTransformState &State) {
assert((isa<PHINode>(&I) || getNumOperands() == 0) &&
"Only PHINodes can have extra operands");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index e13ed0f6d986e..6d9bcb4ed8cc1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -121,7 +121,6 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
})
: false;
})
-
.Case<VPWidenCastRecipe>([](const auto *R) {
// A cast is uniform according to its operand.
return isUniformAcrossVFsAndUFs(R->getOperand(0));
>From e8623c4541ea0c67aa4a1b6ae2b906da5e0ba044 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 13 Mar 2025 18:42:42 +0000
Subject: [PATCH 3/5] !fixup address latest comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 5 +----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e0ab6d4829f62..f06e0ad8b1e4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -820,7 +820,7 @@ void VPInstruction::execute(VPTransformState &State) {
}
bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
- if (Instruction::isBinaryOp(getOpcode()))
+ if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))
return false;
switch (getOpcode()) {
case Instruction::ICmp:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 6d9bcb4ed8cc1..eb70702014405 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -115,10 +115,7 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
})
.Case<VPInstruction>([](const auto *VPI) {
return Instruction::isCast(VPI->getOpcode())
- ? all_of(VPI->operands(),
- [](VPValue *Op) {
- return isUniformAcrossVFsAndUFs(Op);
- })
+ ? all_of(VPI->operands(), isUniformAcrossVFsAndUFs)
: false;
})
.Case<VPWidenCastRecipe>([](const auto *R) {
>From 243fc6f8e98bf1de7d9f5f836f51c553a6ec2daa Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 24 Mar 2025 21:59:27 +0000
Subject: [PATCH 4/5] !fixup address latest comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------
llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++++---------
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 5 ++---
4 files changed, 12 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f3679e44ab816..e2ad39d56f5a0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1030,8 +1030,6 @@ class VPInstructionWithType : public VPInstruction {
/// Scalar result type produced by the recipe.
Type *ResultTy;
- Value *generate(VPTransformState &State);
-
public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
@@ -1047,16 +1045,16 @@ class VPInstructionWithType : public VPInstruction {
}
VPInstruction *clone() override {
- auto *New =
- new VPInstructionWithType(getOpcode(), {getOperand(0)}, getResultType(),
- getDebugLoc(), getName());
+ SmallVector<VPValue *, 2> Operands(operands());
+ auto *New = new VPInstructionWithType(
+ getOpcode(), Operands, getResultType(), getDebugLoc(), getName());
New->setUnderlyingValue(getUnderlyingValue());
return New;
}
void execute(VPTransformState &State) override;
- /// Return the cost of this VPIRInstruction.
+ /// Return the cost of this VPInstruction.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
// TODO: Compute accurate cost after retiring the legacy cost model.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index af25dbf99c957..eb3af41c58179 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -259,6 +259,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
+ // VPInstructionWithType must be handled before VPInstruction.
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe>(
[](const auto *R) { return R->getResultType(); })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 89587ac70cf8a..af315229018d1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1039,28 +1039,24 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif
-Value *VPInstructionWithType::generate(VPTransformState &State) {
+void VPInstructionWithType::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (getOpcode()) {
- case Instruction::SExt:
case Instruction::ZExt:
case Instruction::Trunc: {
- // Note: SExt/ZExt not used yet.
Value *Op = State.get(getOperand(0), VPLane(0));
- return State.Builder.CreateCast(Instruction::CastOps(getOpcode()), Op,
- ResultTy);
+ Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),
+ Op, ResultTy);
+ State.set(this, Cast, VPLane(0));
+ break;
}
default:
llvm_unreachable("opcode not implemented yet");
}
}
-void VPInstructionWithType::execute(VPTransformState &State) {
- State.set(this, generate(State), VPLane(0));
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index e94a40d662d8b..88d2aaa57f94d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -112,9 +112,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
all_of(R->operands(), isUniformAcrossVFsAndUFs);
})
.Case<VPInstruction>([](const auto *VPI) {
- return Instruction::isCast(VPI->getOpcode())
- ? all_of(VPI->operands(), isUniformAcrossVFsAndUFs)
- : false;
+ return Instruction::isCast(VPI->getOpcode()) &&
+ all_of(VPI->operands(), isUniformAcrossVFsAndUFs);
})
.Case<VPWidenCastRecipe>([](const auto *R) {
// A cast is uniform according to its operand.
>From 1749b91ad2242119ad9dce8e20aba38c5a6865f3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 25 Mar 2025 15:05:44 +0000
Subject: [PATCH 5/5] !fixup add VPInstruction::isCast.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +--
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++++++----
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4a6e03a7342ab..c7bb6ac72d4f3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10410,8 +10410,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
isa<VPDerivedIVRecipe>(U) ||
- Instruction::isCast(
- cast<VPInstruction>(U)->getOpcode()) ||
+ VPInstruction::isCast(U) ||
cast<VPInstruction>(U)->getOpcode() ==
Instruction::Add;
}) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 97c13c47902c8..88d65e982a91e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1021,6 +1021,12 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Returns the symbolic name assigned to the VPInstruction.
StringRef getName() const { return Name; }
+
+ /// Return true if \p U is a cast.
+ static bool isCast(const VPUser *U) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ return VPI && Instruction::isCast(VPI->getOpcode());
+ }
};
/// A specialization of VPInstruction augmenting it with a dedicated result
@@ -1035,10 +1041,7 @@ class VPInstructionWithType : public VPInstruction {
Type *ResultTy, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Opcode, Operands, DL, Name), ResultTy(ResultTy) {}
- static inline bool classof(const VPRecipeBase *R) {
- auto *VPI = dyn_cast<VPInstruction>(R);
- return VPI && Instruction::isCast(VPI->getOpcode());
- }
+ static inline bool classof(const VPRecipeBase *R) { return isCast(R); }
static inline bool classof(const VPUser *R) {
return isa<VPInstructionWithType>(cast<VPRecipeBase>(R));
More information about the llvm-commits
mailing list