[llvm] [VPlan] Move VPDerivedIVRecipe::execute to VPlanRecipes (NFC) (PR #176577)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 19 01:18:15 PST 2026
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/176577
>From 63e21f3ebe7ea23a19ff700ce8e73f7cc81d914d Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Sat, 17 Jan 2026 16:13:33 +0000
Subject: [PATCH 1/2] [VPlan] Move VPDerivedIVRecipe::execute to VPlanRecipes
(NFC)
---
.../Transforms/Vectorize/LoopVectorize.cpp | 200 ++++++++----------
llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 9 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 17 ++
3 files changed, 115 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b4546636e584..c77dcc7880947 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -787,6 +787,95 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
return B.CreateElementCount(Ty, VF);
}
+/// FIXME: The newly created binary instructions should contain nsw/nuw
+/// flags, which can be found from the original scalar operations.
+Value *emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
+ Value *Step,
+ InductionDescriptor::InductionKind InductionKind,
+ const BinaryOperator *InductionBinOp) {
+ using namespace llvm::PatternMatch;
+ Type *StepTy = Step->getType();
+ Value *CastedIndex = StepTy->isIntegerTy()
+ ? B.CreateSExtOrTrunc(Index, StepTy)
+ : B.CreateCast(Instruction::SIToFP, Index, StepTy);
+ if (CastedIndex != Index) {
+ CastedIndex->setName(CastedIndex->getName() + ".cast");
+ Index = CastedIndex;
+ }
+
+ // Note: the IR at this point is broken. We cannot use SE to create any new
+ // SCEV and then expand it, hoping that SCEV's simplification will give us
+ // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+ // lead to various SCEV crashes. So all we can do is to use builder and rely
+ // on InstCombine for future simplifications. Here we handle some trivial
+ // cases only.
+ auto CreateAdd = [&B](Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Types don't match!");
+ if (match(X, m_ZeroInt()))
+ return Y;
+ if (match(Y, m_ZeroInt()))
+ return X;
+ return B.CreateAdd(X, Y);
+ };
+
+ // We allow X to be a vector type, in which case Y will potentially be
+ // splatted into a vector with the same element count.
+ auto CreateMul = [&B](Value *X, Value *Y) {
+ assert(X->getType()->getScalarType() == Y->getType() &&
+ "Types don't match!");
+ if (match(X, m_One()))
+ return Y;
+ if (match(Y, m_One()))
+ return X;
+ VectorType *XVTy = dyn_cast<VectorType>(X->getType());
+ if (XVTy && !isa<VectorType>(Y->getType()))
+ Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
+ return B.CreateMul(X, Y);
+ };
+
+ switch (InductionKind) {
+ case InductionDescriptor::IK_IntInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for integer inductions yet");
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (isa<ConstantInt>(Step) && cast<ConstantInt>(Step)->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ auto *Offset = CreateMul(Index, Step);
+ return CreateAdd(StartValue, Offset);
+ }
+ case InductionDescriptor::IK_PtrInduction:
+ return B.CreatePtrAdd(StartValue, CreateMul(Index, Step));
+ case InductionDescriptor::IK_FpInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for FP inductions yet");
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *MulExp = B.CreateFMul(Step, Index);
+ return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+ "induction");
+ }
+ case InductionDescriptor::IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+static std::optional<unsigned> getMaxVScale(const Function &F,
+ const TargetTransformInfo &TTI) {
+ if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
+ return MaxVScale;
+
+ if (F.hasFnAttribute(Attribute::VScaleRange))
+ return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
+
+ return std::nullopt;
+}
+
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -2156,100 +2245,6 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
// LoopVectorizationCostModel and LoopVectorizationPlanner.
//===----------------------------------------------------------------------===//
-/// Compute the transformed value of Index at offset StartValue using step
-/// StepValue.
-/// For integer induction, returns StartValue + Index * StepValue.
-/// For pointer induction, returns StartValue[Index * StepValue].
-/// FIXME: The newly created binary instructions should contain nsw/nuw
-/// flags, which can be found from the original scalar operations.
-static Value *
-emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
- Value *Step,
- InductionDescriptor::InductionKind InductionKind,
- const BinaryOperator *InductionBinOp) {
- using namespace llvm::PatternMatch;
- Type *StepTy = Step->getType();
- Value *CastedIndex = StepTy->isIntegerTy()
- ? B.CreateSExtOrTrunc(Index, StepTy)
- : B.CreateCast(Instruction::SIToFP, Index, StepTy);
- if (CastedIndex != Index) {
- CastedIndex->setName(CastedIndex->getName() + ".cast");
- Index = CastedIndex;
- }
-
- // Note: the IR at this point is broken. We cannot use SE to create any new
- // SCEV and then expand it, hoping that SCEV's simplification will give us
- // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
- // lead to various SCEV crashes. So all we can do is to use builder and rely
- // on InstCombine for future simplifications. Here we handle some trivial
- // cases only.
- auto CreateAdd = [&B](Value *X, Value *Y) {
- assert(X->getType() == Y->getType() && "Types don't match!");
- if (match(X, m_ZeroInt()))
- return Y;
- if (match(Y, m_ZeroInt()))
- return X;
- return B.CreateAdd(X, Y);
- };
-
- // We allow X to be a vector type, in which case Y will potentially be
- // splatted into a vector with the same element count.
- auto CreateMul = [&B](Value *X, Value *Y) {
- assert(X->getType()->getScalarType() == Y->getType() &&
- "Types don't match!");
- if (match(X, m_One()))
- return Y;
- if (match(Y, m_One()))
- return X;
- VectorType *XVTy = dyn_cast<VectorType>(X->getType());
- if (XVTy && !isa<VectorType>(Y->getType()))
- Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
- return B.CreateMul(X, Y);
- };
-
- switch (InductionKind) {
- case InductionDescriptor::IK_IntInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for integer inductions yet");
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
- if (isa<ConstantInt>(Step) && cast<ConstantInt>(Step)->isMinusOne())
- return B.CreateSub(StartValue, Index);
- auto *Offset = CreateMul(Index, Step);
- return CreateAdd(StartValue, Offset);
- }
- case InductionDescriptor::IK_PtrInduction:
- return B.CreatePtrAdd(StartValue, CreateMul(Index, Step));
- case InductionDescriptor::IK_FpInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for FP inductions yet");
- assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- assert(InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub) &&
- "Original bin op should be defined for FP induction");
-
- Value *MulExp = B.CreateFMul(Step, Index);
- return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
- "induction");
- }
- case InductionDescriptor::IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
-}
-
-static std::optional<unsigned> getMaxVScale(const Function &F,
- const TargetTransformInfo &TTI) {
- if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
- return MaxVScale;
-
- if (F.hasFnAttribute(Attribute::VScaleRange))
- return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
-
- return std::nullopt;
-}
-
/// For the given VF and UF and maximum trip count computed for the loop, return
/// whether the induction variable might overflow in the vectorized loop. If not,
/// then we know a runtime overflow check always evaluates to false and can be
@@ -8967,23 +8962,6 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE);
}
-void VPDerivedIVRecipe::execute(VPTransformState &State) {
- assert(!State.Lane && "VPDerivedIVRecipe being replicated.");
-
- // Fast-math-flags propagate from the original induction instruction.
- IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
- if (FPBinOp)
- State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());
-
- Value *Step = State.get(getStepValue(), VPLane(0));
- Value *Index = State.get(getOperand(1), VPLane(0));
- Value *DerivedIV = emitTransformedIndex(
- State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind,
- cast_if_present<BinaryOperator>(FPBinOp));
- DerivedIV->setName(Name);
- State.set(this, DerivedIV, VPLane(0));
-}
-
// Determine how to lower the scalar epilogue, which depends on 1) optimising
// for minimum code-size, 2) predicate compiler options, 3) loop hints forcing
// predication, and 4) a TTI hook that analyses whether the loop is suitable
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 973cbaa4944d6..334bf9d0620ca 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -50,6 +50,15 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
+/// Compute the transformed value of Index at offset StartValue using step
+/// StepValue.
+/// For integer induction, returns StartValue + Index * StepValue.
+/// For pointer induction, returns StartValue[Index * StepValue].
+Value *emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
+ Value *Step,
+ InductionDescriptor::InductionKind InductionKind,
+ const BinaryOperator *InductionBinOp);
+
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
/// [1, 16) = {1, 2, 4, 8}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b10dd17fbfc89..5ef0c0d195eb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2448,6 +2448,23 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
getScalarType() == getRegion()->getCanonicalIVType();
}
+void VPDerivedIVRecipe::execute(VPTransformState &State) {
+ assert(!State.Lane && "VPDerivedIVRecipe being replicated.");
+
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
+ if (FPBinOp)
+ State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());
+
+ Value *Step = State.get(getStepValue(), VPLane(0));
+ Value *Index = State.get(getOperand(1), VPLane(0));
+ Value *DerivedIV = emitTransformedIndex(
+ State.Builder, Index, getStartValue()->getLiveInIRValue(), Step, Kind,
+ cast_if_present<BinaryOperator>(FPBinOp));
+ DerivedIV->setName(Name);
+ State.set(this, DerivedIV, VPLane(0));
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPDerivedIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
>From 86e346979b9f00068e0b5569943323535b96f99f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Mon, 19 Jan 2026 09:01:35 +0000
Subject: [PATCH 2/2] [LV] Revert some moves
---
.../Transforms/Vectorize/LoopVectorize.cpp | 179 +++++++++---------
1 file changed, 90 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c77dcc7880947..a01c268dec18b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -787,95 +787,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
return B.CreateElementCount(Ty, VF);
}
-/// FIXME: The newly created binary instructions should contain nsw/nuw
-/// flags, which can be found from the original scalar operations.
-Value *emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
- Value *Step,
- InductionDescriptor::InductionKind InductionKind,
- const BinaryOperator *InductionBinOp) {
- using namespace llvm::PatternMatch;
- Type *StepTy = Step->getType();
- Value *CastedIndex = StepTy->isIntegerTy()
- ? B.CreateSExtOrTrunc(Index, StepTy)
- : B.CreateCast(Instruction::SIToFP, Index, StepTy);
- if (CastedIndex != Index) {
- CastedIndex->setName(CastedIndex->getName() + ".cast");
- Index = CastedIndex;
- }
-
- // Note: the IR at this point is broken. We cannot use SE to create any new
- // SCEV and then expand it, hoping that SCEV's simplification will give us
- // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
- // lead to various SCEV crashes. So all we can do is to use builder and rely
- // on InstCombine for future simplifications. Here we handle some trivial
- // cases only.
- auto CreateAdd = [&B](Value *X, Value *Y) {
- assert(X->getType() == Y->getType() && "Types don't match!");
- if (match(X, m_ZeroInt()))
- return Y;
- if (match(Y, m_ZeroInt()))
- return X;
- return B.CreateAdd(X, Y);
- };
-
- // We allow X to be a vector type, in which case Y will potentially be
- // splatted into a vector with the same element count.
- auto CreateMul = [&B](Value *X, Value *Y) {
- assert(X->getType()->getScalarType() == Y->getType() &&
- "Types don't match!");
- if (match(X, m_One()))
- return Y;
- if (match(Y, m_One()))
- return X;
- VectorType *XVTy = dyn_cast<VectorType>(X->getType());
- if (XVTy && !isa<VectorType>(Y->getType()))
- Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
- return B.CreateMul(X, Y);
- };
-
- switch (InductionKind) {
- case InductionDescriptor::IK_IntInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for integer inductions yet");
- assert(Index->getType() == StartValue->getType() &&
- "Index type does not match StartValue type");
- if (isa<ConstantInt>(Step) && cast<ConstantInt>(Step)->isMinusOne())
- return B.CreateSub(StartValue, Index);
- auto *Offset = CreateMul(Index, Step);
- return CreateAdd(StartValue, Offset);
- }
- case InductionDescriptor::IK_PtrInduction:
- return B.CreatePtrAdd(StartValue, CreateMul(Index, Step));
- case InductionDescriptor::IK_FpInduction: {
- assert(!isa<VectorType>(Index->getType()) &&
- "Vector indices not supported for FP inductions yet");
- assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
- assert(InductionBinOp &&
- (InductionBinOp->getOpcode() == Instruction::FAdd ||
- InductionBinOp->getOpcode() == Instruction::FSub) &&
- "Original bin op should be defined for FP induction");
-
- Value *MulExp = B.CreateFMul(Step, Index);
- return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
- "induction");
- }
- case InductionDescriptor::IK_NoInduction:
- return nullptr;
- }
- llvm_unreachable("invalid enum");
-}
-
-static std::optional<unsigned> getMaxVScale(const Function &F,
- const TargetTransformInfo &TTI) {
- if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
- return MaxVScale;
-
- if (F.hasFnAttribute(Attribute::VScaleRange))
- return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
-
- return std::nullopt;
-}
-
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -2245,6 +2156,96 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
// LoopVectorizationCostModel and LoopVectorizationPlanner.
//===----------------------------------------------------------------------===//
+/// FIXME: The newly created binary instructions should contain nsw/nuw
+/// flags, which can be found from the original scalar operations.
+Value *
+llvm::emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
+ Value *Step,
+ InductionDescriptor::InductionKind InductionKind,
+ const BinaryOperator *InductionBinOp) {
+ using namespace llvm::PatternMatch;
+ Type *StepTy = Step->getType();
+ Value *CastedIndex = StepTy->isIntegerTy()
+ ? B.CreateSExtOrTrunc(Index, StepTy)
+ : B.CreateCast(Instruction::SIToFP, Index, StepTy);
+ if (CastedIndex != Index) {
+ CastedIndex->setName(CastedIndex->getName() + ".cast");
+ Index = CastedIndex;
+ }
+
+ // Note: the IR at this point is broken. We cannot use SE to create any new
+ // SCEV and then expand it, hoping that SCEV's simplification will give us
+ // a more optimal code. Unfortunately, attempt of doing so on invalid IR may
+ // lead to various SCEV crashes. So all we can do is to use builder and rely
+ // on InstCombine for future simplifications. Here we handle some trivial
+ // cases only.
+ auto CreateAdd = [&B](Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Types don't match!");
+ if (match(X, m_ZeroInt()))
+ return Y;
+ if (match(Y, m_ZeroInt()))
+ return X;
+ return B.CreateAdd(X, Y);
+ };
+
+ // We allow X to be a vector type, in which case Y will potentially be
+ // splatted into a vector with the same element count.
+ auto CreateMul = [&B](Value *X, Value *Y) {
+ assert(X->getType()->getScalarType() == Y->getType() &&
+ "Types don't match!");
+ if (match(X, m_One()))
+ return Y;
+ if (match(Y, m_One()))
+ return X;
+ VectorType *XVTy = dyn_cast<VectorType>(X->getType());
+ if (XVTy && !isa<VectorType>(Y->getType()))
+ Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
+ return B.CreateMul(X, Y);
+ };
+
+ switch (InductionKind) {
+ case InductionDescriptor::IK_IntInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for integer inductions yet");
+ assert(Index->getType() == StartValue->getType() &&
+ "Index type does not match StartValue type");
+ if (isa<ConstantInt>(Step) && cast<ConstantInt>(Step)->isMinusOne())
+ return B.CreateSub(StartValue, Index);
+ auto *Offset = CreateMul(Index, Step);
+ return CreateAdd(StartValue, Offset);
+ }
+ case InductionDescriptor::IK_PtrInduction:
+ return B.CreatePtrAdd(StartValue, CreateMul(Index, Step));
+ case InductionDescriptor::IK_FpInduction: {
+ assert(!isa<VectorType>(Index->getType()) &&
+ "Vector indices not supported for FP inductions yet");
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *MulExp = B.CreateFMul(Step, Index);
+ return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+ "induction");
+ }
+ case InductionDescriptor::IK_NoInduction:
+ return nullptr;
+ }
+ llvm_unreachable("invalid enum");
+}
+
+static std::optional<unsigned> getMaxVScale(const Function &F,
+ const TargetTransformInfo &TTI) {
+ if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
+ return MaxVScale;
+
+ if (F.hasFnAttribute(Attribute::VScaleRange))
+ return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
+
+ return std::nullopt;
+}
+
/// For the given VF and UF and maximum trip count computed for the loop, return
/// whether the induction variable might overflow in the vectorized loop. If not,
/// then we know a runtime overflow check always evaluates to false and can be
More information about the llvm-commits
mailing list