[llvm] fdce0ea - [VPlan] Add ExitingIVValue VPInstruction. (#175651)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 6 04:27:36 PST 2026
Author: Florian Hahn
Date: 2026-02-06T12:27:31Z
New Revision: fdce0ea70848ee71a25304b203b5209ba3d11b7b
URL: https://github.com/llvm/llvm-project/commit/fdce0ea70848ee71a25304b203b5209ba3d11b7b
DIFF: https://github.com/llvm/llvm-project/commit/fdce0ea70848ee71a25304b203b5209ba3d11b7b.diff
LOG: [VPlan] Add ExitingIVValue VPInstruction. (#175651)
Add a new VPInstruction opcode to compute the exiting value of an
induction variable after vectorization. This replaces the pattern of
extracting the last lane from the last part of the induction backedge
value when applicable.
This allows us to always use the pre-computed IV end value. It will also
allow unifying end value creation for both induction resume and exit
values.
PR: https://github.com/llvm/llvm-project/pull/175651
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fba6e9fc702d6..d064a0c66a081 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1262,7 +1262,12 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Returns the value for vscale.
VScale,
- OpsEnd = VScale,
+ /// Compute the exiting value of a wide induction after vectorization, that
+ /// is the value of the last lane of the induction increment (i.e. its
+ /// backedge value). Takes the wide induction recipe and the original
+ /// backedge value as operands.
+ ExitingIVValue,
+ OpsEnd = ExitingIVValue,
};
/// Returns true if this VPInstruction generates scalar values for all lanes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 8fbe7d93e6f45..567db952bec29 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -78,6 +78,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case Instruction::PHI:
case VPInstruction::Broadcast:
case VPInstruction::ComputeReductionResult:
+ case VPInstruction::ExitingIVValue:
case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ExtractLastPart:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 372acefbca4d8..eea9952243cd3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -643,8 +643,31 @@ createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start,
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
- IndDesc, Flags, DL);
+ auto *WideIV = new VPWidenIntOrFpInductionRecipe(
+ Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
+
+ // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
+ // recipes.
+ VPValue *BackedgeVal = PhiR->getOperand(1);
+ for (VPUser *U : to_vector(BackedgeVal->users())) {
+ if (!match(U, m_ExtractLastPart(m_VPValue())))
+ continue;
+ auto *ExtractLastPart = cast<VPInstruction>(U);
+ if (!match(ExtractLastPart->getSingleUser(),
+ m_ExtractLastLane(m_VPValue())))
+ continue;
+ auto *ExtractLastLane =
+ cast<VPInstruction>(ExtractLastPart->getSingleUser());
+ assert(is_contained(ExtractLastLane->getParent()->successors(),
+ Plan.getScalarPreheader()) &&
+ "last lane must be extracted in the middle block");
+ VPBuilder Builder(ExtractLastLane);
+ ExtractLastLane->replaceAllUsesWith(Builder.createNaryOp(
+ VPInstruction::ExitingIVValue, {WideIV, BackedgeVal}));
+ ExtractLastLane->eraseFromParent();
+ ExtractLastPart->eraseFromParent();
+ }
+ return WideIV;
}
void VPlanTransforms::createHeaderPhiRecipes(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index cfb127541bfbd..25b247317a27c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -515,6 +515,12 @@ inline VPInstruction_match<VPInstruction::StepVector> m_StepVector() {
return m_VPInstruction<VPInstruction::StepVector>();
}
+template <typename Op0_t, typename Op1_t>
+inline VPInstruction_match<VPInstruction::ExitingIVValue, Op0_t, Op1_t>
+m_ExitingIVValue(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_VPInstruction<VPInstruction::ExitingIVValue>(Op0, Op1);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 2ba237c4cd6f8..e797cb091b3c2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -321,7 +321,9 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
VPValue *Op;
- if (!match(&R, m_ExtractLastLane(m_ExtractLastPart(m_VPValue(Op)))))
+ if (!match(&R, m_CombineOr(
+ m_ExitingIVValue(m_VPValue(), m_VPValue(Op)),
+ m_ExtractLastLane(m_ExtractLastPart(m_VPValue(Op))))))
continue;
// Compute the index of the last active lane.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f97bdc072926a..1d0c3d7a61f94 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -470,6 +470,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Store:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnTwoConds:
+ case VPInstruction::ExitingIVValue:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
@@ -1322,6 +1323,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
+ case VPInstruction::ExitingIVValue:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
@@ -1385,6 +1387,7 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
return false;
case VPInstruction::ComputeAnyOfResult:
return Op == getOperand(0) || Op == getOperand(1);
+ case VPInstruction::ExitingIVValue:
case VPInstruction::ExtractLane:
return Op == getOperand(0);
};
@@ -1470,6 +1473,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::BuildVector:
O << "buildvector";
break;
+ case VPInstruction::ExitingIVValue:
+ O << "exiting-iv-value";
+ break;
case VPInstruction::ExtractLane:
O << "extract-lane";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e19c944fe186..2479cb6243b48 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -995,10 +995,14 @@ static VPValue *optimizeLatchExitInductionUser(
VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
DenseMap<VPValue *, VPValue *> &EndValues, PredicatedScalarEvolution &PSE) {
VPValue *Incoming;
- if (!match(Op, m_ExtractLastLaneOfLastPart(m_VPValue(Incoming))))
- return nullptr;
+ VPWidenInductionRecipe *WideIV = nullptr;
+ if (match(Op, m_ExitingIVValue(m_VPValue(), m_VPValue(Incoming)))) {
+ WideIV = getOptimizableIVOf(Op->getDefiningRecipe()->getOperand(0), PSE);
+ assert(WideIV && "must have an optimizable IV");
+ } else if (match(Op, m_ExtractLastLaneOfLastPart(m_VPValue(Incoming)))) {
+ WideIV = getOptimizableIVOf(Incoming, PSE);
+ }
- auto *WideIV = getOptimizableIVOf(Incoming, PSE);
if (!WideIV)
return nullptr;
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 62ffd2eb1a6fc..10d045edd3fd2 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -1129,7 +1129,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
; VEC: [[VECTOR_BODY]]:
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
@@ -1138,16 +1137,12 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; VEC: [[MIDDLE_BLOCK]]:
-; VEC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1)
-; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> splat (i32 1), [[TMP8]]
-; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; VEC-NEXT: br label %[[E_EXIT:.*]]
; VEC: [[E_EXIT]]:
-; VEC-NEXT: ret i32 [[TMP7]]
+; VEC-NEXT: ret i32 8
;
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
; INTERLEAVE-SAME: ptr [[DST:%.*]]) {
@@ -1165,15 +1160,13 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
-; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1
-; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 1, [[TMP4]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[MIDDLE_BLOCK]]:
; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]]
; INTERLEAVE: [[E_EXIT]]:
-; INTERLEAVE-NEXT: ret i32 [[TMP5]]
+; INTERLEAVE-NEXT: ret i32 8
;
entry:
%step.1 = sext i8 0 to i32
@@ -1292,13 +1285,11 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1
; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2
-; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1
-; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1
+; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; VEC: [[MIDDLE_BLOCK]]:
; VEC-NEXT: br label %[[EXIT:.*]]
; VEC: [[EXIT]]:
-; VEC-NEXT: br label %[[EXIT1:.*]]
-; VEC: [[EXIT1]]:
-; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA1]]
+; VEC-NEXT: ret i64 1
;
; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented(
; INTERLEAVE-SAME: ptr [[DST:%.*]]) {
@@ -1311,13 +1302,11 @@ define i64 @test_iv_increment_incremented(ptr %dst) {
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2
; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1
-; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP2]], 1
; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; INTERLEAVE: [[MIDDLE_BLOCK]]:
; INTERLEAVE-NEXT: br label %[[EXIT:.*]]
; INTERLEAVE: [[EXIT]]:
-; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA1]]
+; INTERLEAVE-NEXT: ret i64 1
;
entry:
br label %loop
More information about the llvm-commits
mailing list