[llvm] [VPlan] Improve code in VPInstruction::generate (NFC) (PR #169470)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 25 04:34:40 PST 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/169470
>From 089d66fb9aebac5bbf4b499d47447bb0a06c930d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 25 Nov 2025 08:13:01 +0000
Subject: [PATCH 1/2] [VPlan] Improve code in VPInstruction::generate (NFC)
---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 48 ++++++++-----------
1 file changed, 21 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 54fdec3bcf4a1..e73967e1e96dc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -722,10 +722,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
Name);
- auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
- auto PredTy = VectorType::get(
- Int1Ty, State.VF * cast<ConstantInt>(getOperand(2)->getLiveInIRValue())
- ->getZExtValue());
+ ElementCount EC = State.VF.multiplyCoefficientBy(
+ cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
+ auto *PredTy = VectorType::get(Builder.getInt1Ty(), EC);
return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
{PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, Name);
@@ -755,7 +754,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
Value *Sub = Builder.CreateSub(ScalarTC, Step);
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
- Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
+ Value *Zero = ConstantInt::getNullValue(ScalarTC->getType());
return Builder.CreateSelect(Cmp, Sub, Zero);
}
case VPInstruction::ExplicitVectorLength: {
@@ -767,11 +766,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
"Requested vector length should be an integer.");
assert(State.VF.isScalable() && "Expected scalable vector factor.");
- Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
+ Value *VFArg = Builder.getInt32(State.VF.getKnownMinValue());
- Value *EVL = State.Builder.CreateIntrinsic(
- State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
- {AVL, VFArg, State.Builder.getTrue()});
+ Value *EVL = Builder.CreateIntrinsic(
+ Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
+ {AVL, VFArg, Builder.getTrue()});
return EVL;
}
case VPInstruction::CanonicalIVIncrementForPart: {
@@ -808,8 +807,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
cast<StructType>(State.TypeAnalysis.inferScalarType(getOperand(0)));
Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));
for (const auto &[LaneIndex, Op] : enumerate(operands())) {
- for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
- FieldIndex++) {
+ for (unsigned FieldIndex : seq<unsigned>(StructTy->getNumElements())) {
Value *ScalarValue =
Builder.CreateExtractValue(State.get(Op, true), FieldIndex);
Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);
@@ -825,8 +823,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
auto NumOfElements = ElementCount::getFixed(getNumOperands());
Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));
for (const auto &[Idx, Op] : enumerate(operands()))
- Res = State.Builder.CreateInsertElement(Res, State.get(Op, true),
- State.Builder.getInt32(Idx));
+ Res = Builder.CreateInsertElement(Res, State.get(Op, true),
+ Builder.getInt32(Idx));
return Res;
}
case VPInstruction::ReductionStartVector: {
@@ -839,9 +837,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
ElementCount VF = State.VF.divideCoefficientBy(
cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
- Constant *Zero = Builder.getInt32(0);
return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
- Zero);
+ Builder.getInt32(0));
}
case VPInstruction::ComputeAnyOfResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
@@ -849,7 +846,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
Value *ReducedPartRdx = State.get(getOperand(2));
- for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
+ for (unsigned Idx : seq<unsigned>(3, getNumOperands()))
ReducedPartRdx =
Builder.CreateBinOp(Instruction::Or, State.get(getOperand(Idx)),
ReducedPartRdx, "bin.rdx");
@@ -877,7 +874,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
else
MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
- for (unsigned Part = 1; Part < UF; ++Part)
+ for (unsigned Part : seq<unsigned>(1, UF))
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
State.get(getOperand(3 + Part)));
@@ -900,7 +897,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// each part of the reduction.
unsigned UF = getNumOperands() - 1;
VectorParts RdxParts(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
+ for (unsigned Part : seq<unsigned>(UF))
RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
@@ -918,14 +915,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
else {
- Instruction::BinaryOps Opcode;
// For sub-recurrences, each UF's reduction variable is already
// negative, we need to do: reduce.add(-acc_uf0 + -acc_uf1)
- if (RK == RecurKind::Sub)
- Opcode = Instruction::Add;
- else
- Opcode =
- (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK);
+ Instruction::BinaryOps Opcode =
+ RK == RecurKind::Sub
+ ? Instruction::Add
+ : (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK);
ReducedPartRdx =
Builder.CreateBinOp(Opcode, RdxPart, ReducedPartRdx, "bin.rdx");
}
@@ -990,7 +985,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *LaneToExtract = State.get(getOperand(0), true);
Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
Value *Res = nullptr;
- Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
+ Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
Value *VectorStart =
@@ -1020,8 +1015,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// If there are multiple operands, create a chain of selects to pick the
// first operand with an active lane and add the number of lanes of the
// preceding operands.
- Value *RuntimeVF =
- getRuntimeVF(State.Builder, State.Builder.getInt64Ty(), State.VF);
+ Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt64Ty(), State.VF);
unsigned LastOpIdx = getNumOperands() - 1;
Value *Res = nullptr;
for (int Idx = LastOpIdx; Idx >= 0; --Idx) {
>From edbe1024272075ac46d6294904bb23ea82b2641f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 25 Nov 2025 12:31:09 +0000
Subject: [PATCH 2/2] [VPlan] Revert seq changes
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e73967e1e96dc..e0ef3d5f41b6a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -807,7 +807,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
cast<StructType>(State.TypeAnalysis.inferScalarType(getOperand(0)));
Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));
for (const auto &[LaneIndex, Op] : enumerate(operands())) {
- for (unsigned FieldIndex : seq<unsigned>(StructTy->getNumElements())) {
+ for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();
+ FieldIndex++) {
Value *ScalarValue =
Builder.CreateExtractValue(State.get(Op, true), FieldIndex);
Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);
@@ -846,7 +847,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
Value *ReducedPartRdx = State.get(getOperand(2));
- for (unsigned Idx : seq<unsigned>(3, getNumOperands()))
+ for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
ReducedPartRdx =
Builder.CreateBinOp(Instruction::Or, State.get(getOperand(Idx)),
ReducedPartRdx, "bin.rdx");
@@ -874,7 +875,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
else
MinMaxKind = IsSigned ? RecurKind::SMin : RecurKind::UMin;
- for (unsigned Part : seq<unsigned>(1, UF))
+ for (unsigned Part = 1; Part < UF; ++Part)
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
State.get(getOperand(3 + Part)));
@@ -897,7 +898,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// each part of the reduction.
unsigned UF = getNumOperands() - 1;
VectorParts RdxParts(UF);
- for (unsigned Part : seq<unsigned>(UF))
+ for (unsigned Part = 0; Part < UF; ++Part)
RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
More information about the llvm-commits
mailing list