[llvm] [VPlan] Replace ExtractLast(Elem|LanePerPart) with ExtractLast(Lane/Part) (PR #164124)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 7 06:39:06 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/164124
>From dbb1410106dc227ddfd106a72091ea9161ecb6c3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 10:35:38 +0000
Subject: [PATCH 1/4] [VPlan] Replace ExtractLast(Elem|LanePerPart) with
ExtractLast(Lane/Part)
---
.../Transforms/Vectorize/LoopVectorize.cpp | 15 +++-
llvm/lib/Transforms/Vectorize/VPlan.h | 18 +++--
.../Transforms/Vectorize/VPlanAnalysis.cpp | 17 +++--
.../Vectorize/VPlanConstruction.cpp | 2 +-
.../Transforms/Vectorize/VPlanPatternMatch.h | 20 ++++--
.../Transforms/Vectorize/VPlanPredicator.cpp | 8 +--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 30 ++++----
.../Transforms/Vectorize/VPlanTransforms.cpp | 71 +++++++++----------
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 30 ++++----
.../first-order-recurrence-chains-vplan.ll | 27 ++++---
.../interleave-and-scalarize-only.ll | 3 +-
.../vplan-printing-reductions.ll | 3 +-
.../LoopVectorize/vplan-printing.ll | 9 ++-
13 files changed, 141 insertions(+), 112 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4a89f7dd8672e..904ce75c69f4f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8963,14 +8963,23 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
// Update all users outside the vector region. Also replace redundant
- // ExtractLastElement.
+ // extracts.
for (auto *U : to_vector(OrigExitingVPV->users())) {
auto *Parent = cast<VPRecipeBase>(U)->getParent();
if (FinalReductionResult == U || Parent->getParent())
continue;
U->replaceUsesOfWith(OrigExitingVPV, FinalReductionResult);
- if (match(U, m_CombineOr(m_ExtractLastElement(m_VPValue()),
- m_ExtractLane(m_VPValue(), m_VPValue()))))
+
+ // Look through ExtractLastPart.
+ if (match(U, m_ExtractLastPart(m_VPValue()))) {
+ auto *ExtractPart = cast<VPInstruction>(U);
+ if (ExtractPart->getNumUsers() != 1)
+ continue;
+ U = *ExtractPart->user_begin();
+ }
+
+ if (match(U, m_CombineOr(m_ExtractLane(m_VPValue(), m_VPValue()),
+ m_ExtractLastLane(m_VPValue()))))
cast<VPInstruction>(U)->replaceAllUsesWith(FinalReductionResult);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6ca750fc53279..6dbc56f1edf19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1069,12 +1069,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ComputeAnyOfResult,
ComputeFindIVResult,
ComputeReductionResult,
- // Extracts the last lane from its operand if it is a vector, or the last
- // part if scalar. In the latter case, the recipe will be removed during
- // unrolling.
- ExtractLastElement,
- // Extracts the last lane for each part from its operand.
- ExtractLastLanePerPart,
+ // Extracts the last part of its operand. Removed during unrolling.
+ ExtractLastPart,
+ // Extracts the last lane of its vector operand, per part.
+ ExtractLastLane,
// Extracts the second-to-last lane from its operand or the second-to-last
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
@@ -1461,10 +1459,10 @@ class VPIRInstruction : public VPRecipeBase {
return true;
}
- /// Update the recipes first operand to the last lane of the operand using \p
- /// Builder. Must only be used for VPIRInstructions with at least one operand
- /// wrapping a PHINode.
- void extractLastLaneOfFirstOperand(VPBuilder &Builder);
+ /// Update the recipe's first operand to the final lane of the operand using
+ /// \p Builder. Must only be used for VPIRInstructions with at least one
+ /// operand wrapping a PHINode.
+ void extractFinalLaneOfFirstOperand(VPBuilder &Builder);
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ea38a8b16ebc7..c82322e304e38 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -119,14 +119,18 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
return Type::getIntNTy(Ctx, 64);
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractPenultimateElement: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
return VecTy->getElementType();
return BaseTy;
}
+ case VPInstruction::ExtractLastPart: {
+ // Element type of ExtractLastPart is equal to the element type of its
+ // operand.
+ return inferScalarType(R->getOperand(0));
+ }
case VPInstruction::LogicalAnd:
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
@@ -540,11 +544,14 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
SmallMapVector<unsigned, unsigned, 4> RegUsage;
for (auto *VPV : OpenIntervals) {
- // Skip values that weren't present in the original loop.
- // TODO: Remove after removing the legacy
+ // Skip artificial values or values that weren't present in the original
+ // loop.
+ // TODO: Remove skipping values that weren't present in the original
+ // loop after removing the legacy
// LoopVectorizationCostModel::calculateRegisterUsage
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
- VPBranchOnMaskRecipe>(VPV))
+ VPBranchOnMaskRecipe>(VPV) ||
+ match(VPV, m_ExtractLastPart(m_VPValue())))
continue;
if (VFs[J].isScalar() ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 329b62cee4fce..254baeffbfd85 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -499,7 +499,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
"exit values from early exits must be fixed when branch to "
"early-exit is added");
- ExitIRI->extractLastLaneOfFirstOperand(B);
+ ExitIRI->extractFinalLaneOfFirstOperand(B);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 750ef8edd94bb..4a233f7935f90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -387,9 +387,9 @@ m_EVL(const Op0_t &Op0) {
}
template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::ExtractLastElement, Op0_t>
-m_ExtractLastElement(const Op0_t &Op0) {
- return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
+inline VPInstruction_match<VPInstruction::ExtractLastLane, Op0_t>
+m_ExtractLastLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastLane>(Op0);
}
template <typename Op0_t, typename Op1_t>
@@ -405,9 +405,17 @@ m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
-m_ExtractLastLanePerPart(const Op0_t &Op0) {
- return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+inline VPInstruction_match<VPInstruction::ExtractLastPart, Op0_t>
+m_ExtractLastPart(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastPart>(Op0);
+}
+
+template <typename Op0_t>
+inline VPInstruction_match<
+ VPInstruction::ExtractLastLane,
+ VPInstruction_match<VPInstruction::ExtractLastPart, Op0_t>>
+m_ExtractFinalLane(const Op0_t &Op0) {
+ return m_ExtractLastLane(m_ExtractLastPart(Op0));
}
template <typename Op0_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 3579af21d8b07..4c91a30619990 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -314,11 +314,9 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
"the exit block must have middle block as single predecessor");
VPBuilder B(Plan.getMiddleBlock()->getTerminator());
- for (auto &P : EB->phis()) {
- auto *ExitIRI = cast<VPIRPhi>(&P);
- VPValue *Inc = ExitIRI->getIncomingValue(0);
+ for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
VPValue *Op;
- if (!match(Inc, m_ExtractLastElement(m_VPValue(Op))))
+ if (!match(&R, m_ExtractLastLane(m_ExtractLastPart(m_VPValue(Op)))))
continue;
// Compute the index of the last active lane.
@@ -327,7 +325,7 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
B.createNaryOp(VPInstruction::LastActiveLane, HeaderMask);
auto *Ext =
B.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
- Inc->replaceAllUsesWith(Ext);
+ R.getVPSingleValue()->replaceAllUsesWith(Ext);
}
}
return Predicator.getBlockMaskCache();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0baf7172e4443..29c9f06970380 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -437,8 +437,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::Not:
case VPInstruction::ResumeForEpilogue:
@@ -815,8 +815,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
- case VPInstruction::ExtractLastLanePerPart:
- case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractPenultimateElement: {
unsigned Offset =
getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
@@ -827,6 +826,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Extract lane VF - Offset from the operand.
Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
} else {
+ // TODO: Remove ExtractLastLane for scalar VFs.
assert(Offset <= 1 && "invalid offset to extract from");
Res = State.get(getOperand(0));
}
@@ -1107,7 +1107,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
I32Ty, {Arg0Ty, I32Ty, I1Ty});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
- case VPInstruction::ExtractLastElement: {
+ case VPInstruction::ExtractLastLane: {
// Add on the cost of extracting the element.
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
@@ -1127,8 +1127,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
bool VPInstruction::isVectorToScalar() const {
- return getOpcode() == VPInstruction::ExtractLastElement ||
- getOpcode() == VPInstruction::ExtractLastLanePerPart ||
+ return getOpcode() == VPInstruction::ExtractLastLane ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
@@ -1195,8 +1194,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExtractLane:
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::ExplicitVectorLength:
@@ -1345,11 +1344,11 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractLane:
O << "extract-lane";
break;
- case VPInstruction::ExtractLastElement:
- O << "extract-last-element";
+ case VPInstruction::ExtractLastLane:
+ O << "extract-last-lane";
break;
- case VPInstruction::ExtractLastLanePerPart:
- O << "extract-last-lane-per-part";
+ case VPInstruction::ExtractLastPart:
+ O << "extract-last-part";
break;
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
@@ -1502,7 +1501,7 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF,
return 0;
}
-void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) {
+void VPIRInstruction::extractFinalLaneOfFirstOperand(VPBuilder &Builder) {
assert(isa<PHINode>(getInstruction()) &&
"can only update exiting operands to phi nodes");
assert(getNumOperands() > 0 && "must have at least one operand");
@@ -1510,7 +1509,8 @@ void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) {
if (Exiting->isLiveIn())
return;
- Exiting = Builder.createNaryOp(VPInstruction::ExtractLastElement, {Exiting});
+ Exiting = Builder.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
+ Exiting = Builder.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
setOperand(0, Exiting);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f7281283bae81..8bfd491ea6df2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -923,7 +923,7 @@ static VPValue *optimizeLatchExitInductionUser(
VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
DenseMap<VPValue *, VPValue *> &EndValues, ScalarEvolution &SE) {
VPValue *Incoming;
- if (!match(Op, m_ExtractLastElement(m_VPValue(Incoming))))
+ if (!match(Op, m_ExtractFinalLane(m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -1361,9 +1361,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
- // Look through ExtractLastElement (BuildVector ....).
- if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
- m_ExtractLastLanePerPart(m_BuildVector())))) {
+ // Look through ExtractLastLane (BuildVector ....).
+ if (match(Def, m_ExtractLastLane(m_BuildVector()))) {
auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1451,15 +1450,12 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(Def,
- m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
- m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
+ if (match(Def, m_ExtractLastLane(m_Broadcast(m_VPValue(A))))) {
Def->replaceAllUsesWith(A);
return;
}
- if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
- m_ExtractLastLanePerPart(m_VPValue(A)))) &&
+ if (match(Def, m_ExtractLastLane(m_VPValue(A))) &&
((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
(isa<VPReplicateRecipe>(A) &&
cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1468,11 +1464,8 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
}
- if (Plan->getUF() == 1 &&
- match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
- return Def->replaceAllUsesWith(
- Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
- }
+ if (Plan->getUF() == 1 && match(Def, m_ExtractLastPart(m_VPValue(A))))
+ return Def->replaceAllUsesWith(A);
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1512,13 +1505,14 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Flags*/,
*RepR /*Metadata*/, RepR->getDebugLoc());
Clone->insertBefore(RepOrWidenR);
- unsigned ExtractOpc =
- vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
- ? VPInstruction::ExtractLastElement
- : VPInstruction::ExtractLastLanePerPart;
- auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
- Ext->insertBefore(Clone);
- Clone->setOperand(0, Ext);
+ VPBuilder Builder(Clone);
+ VPValue *ExtractOp = Clone->getOperand(0);
+ if (vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)))
+ ExtractOp =
+ Builder.createNaryOp(VPInstruction::ExtractLastPart, ExtractOp);
+ ExtractOp =
+ Builder.createNaryOp(VPInstruction::ExtractLastLane, ExtractOp);
+ Clone->setOperand(0, ExtractOp);
RepR->eraseFromParent();
continue;
}
@@ -1536,8 +1530,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
[RepOrWidenR](const VPUser *U) {
if (auto *VPI = dyn_cast<VPInstruction>(U)) {
unsigned Opcode = VPI->getOpcode();
- if (Opcode == VPInstruction::ExtractLastElement ||
- Opcode == VPInstruction::ExtractLastLanePerPart ||
+ if (Opcode == VPInstruction::ExtractLastLane ||
+ Opcode == VPInstruction::ExtractLastPart ||
Opcode == VPInstruction::ExtractPenultimateElement)
return true;
}
@@ -2208,7 +2202,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
B.createNaryOp(VPInstruction::ExtractLane,
{PenultimateIndex, FOR->getBackedgeValue()});
VPValue *LastPrevIter =
- B.createNaryOp(VPInstruction::ExtractLastElement, FOR);
+ B.createNaryOp(VPInstruction::ExtractLastLane, FOR);
+
VPValue *Cmp = B.createICmp(CmpInst::ICMP_EQ, LastActiveLane, Zero);
VPValue *Sel = B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
cast<VPInstruction>(U)->replaceAllUsesWith(Sel);
@@ -3739,8 +3734,8 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
- // predecessor. Extract its last lane.
- ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
+ // predecessor. Extract its final lane.
+ ExitIRI->extractFinalLaneOfFirstOperand(MiddleBuilder);
}
VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
@@ -4867,10 +4862,13 @@ void VPlanTransforms::updateScalarResumePhis(
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
"Cannot handle loops with uncountable early exits");
- if (IsFOR)
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
- "vector.recur.extract");
+ if (IsFOR) {
+ auto *ExtractPart = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
+ ResumeFromVectorLoop =
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane,
+ ExtractPart, {}, "vector.recur.extract");
+ }
ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx");
ResumePhiR->setOperand(0, ResumeFromVectorLoop);
}
@@ -4966,10 +4964,11 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
// the VPIRInstruction modeling the phi.
- for (VPUser *U : FOR->users()) {
- using namespace llvm::VPlanPatternMatch;
- if (!match(U, m_ExtractLastElement(m_Specific(FOR))))
+ for (VPRecipeBase &R : make_early_inc_range(
+ make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
+ if (!match(&R, m_ExtractFinalLane(m_Specific(FOR))))
continue;
+
// For VF vscale x 1, if vscale = 1, we are unable to extract the
// penultimate value of the recurrence. Instead we rely on the existing
// extract of the last element from the result of
@@ -4979,9 +4978,9 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
Range))
return;
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
- {}, "vector.recur.extract.for.phi");
- cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
+ VPInstruction::ExtractPenultimateElement, FOR->getBackedgeValue(), {},
+ "vector.recur.extract.for.phi");
+ cast<VPInstruction>(&R)->replaceAllUsesWith(PenultimateElement);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index f215476b1e163..a6418be0a89b4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -370,27 +370,21 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
R.addOperand(getValueForPart(Op1, Part));
continue;
}
- if (match(&R, m_ExtractLastElement(m_VPValue(Op0))) ||
+
+ if (match(&R, m_ExtractFinalLane(m_VPValue(Op0))) ||
match(&R, m_ExtractPenultimateElement(m_VPValue(Op0)))) {
addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
- if (isa<VPFirstOrderRecurrencePHIRecipe>(Op0)) {
- assert(match(&R, m_ExtractLastElement(m_VPValue())) &&
- "can only extract last element of FOR");
- continue;
- }
-
if (Plan.hasScalarVFOnly()) {
auto *I = cast<VPInstruction>(&R);
- // Extracting from end with VF = 1 implies retrieving the last or
- // penultimate scalar part (UF-1 or UF-2).
- unsigned Offset =
- I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
- I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
- R.eraseFromParent();
- } else {
- // Otherwise we extract from the last part.
- remapOperands(&R, UF - 1);
+ bool IsPenultimatePart =
+ I->getOpcode() == VPInstruction::ExtractPenultimateElement;
+ unsigned PartIdx = IsPenultimatePart ? UF - 2 : UF - 1;
+ // For scalar VF, directly use the scalar part value.
+ I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));
+ continue;
}
+ // For vector VF, always extract from the last part.
+ R.setOperand(0, getValueForPart(Op0, UF - 1));
continue;
}
@@ -497,8 +491,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
[[maybe_unused]] bool Matched =
match(Op, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)));
assert(Matched && "original op must have been Unpack");
+ auto *ExtractPart =
+ Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op});
NewOps.push_back(
- Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
+ Builder.createNaryOp(VPInstruction::ExtractLastLane, {ExtractPart}));
continue;
}
if (vputils::isSingleScalar(Op)) {
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index 5cb8bd911df75..e80fa3edba0c7 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -37,8 +37,10 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next>
-; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-element vp<[[FOR1_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_2_PART:%.+]]> = extract-last-part vp<[[FOR1_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[RESUME_2_PART]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -117,9 +119,12 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next>
-; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-element vp<[[FOR1_SPLICE]]>
-; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-last-element vp<[[FOR2_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_2_PART:%.+]]> = extract-last-part vp<[[FOR1_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[RESUME_2_PART]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_3_PART:%.+]]> = extract-last-part vp<[[FOR2_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-last-lane vp<[[RESUME_3_PART]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -203,8 +208,10 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-element ir<%for.x.next>
-; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-element ir<%for.x.prev>
+; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]>
+; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev>
+; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]>
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]>
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph
@@ -282,8 +289,10 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-element ir<%for.x.next>
-; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-element ir<%for.x.prev>
+; CHECK-NEXT: EMIT vp<[[EXT_X_PART:%.+]]> = extract-last-part ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane vp<[[EXT_X_PART]]>
+; CHECK-NEXT: EMIT vp<[[EXT_Y_PART:%.+]]> = extract-last-part ir<%for.x.prev>
+; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane vp<[[EXT_Y_PART]]>
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]>
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 64caecc847096..bbd596a772c53 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -219,7 +219,8 @@ exit:
; DBG-NEXT: Successor(s): middle.block
; DBG-EMPTY:
; DBG-NEXT: middle.block:
-; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element vp<[[SCALAR_STEPS]]>
+; DBG-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part vp<[[SCALAR_STEPS]]>
+; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]>
; DBG-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]>
; DBG-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; DBG-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 0ad720825336b..0fde76477b9f9 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -1117,7 +1117,8 @@ define i64 @print_ext_mul_two_uses(i64 %n, ptr %a, i16 %b, i32 %c) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<%7> = compute-reduction-result ir<%res2>, vp<%5>
-; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-element ir<%load.ext.ext>
+; CHECK-NEXT: EMIT vp<[[EXT_PART:%.+]]> = extract-last-part ir<%load.ext.ext>
+; CHECK-NEXT: EMIT vp<%vector.recur.extract> = extract-last-lane vp<[[EXT_PART]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq vp<%2>, vp<%1>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 7c562be8f65af..6b9e0a267560c 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -536,7 +536,8 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extract-last-element ir<%add>
+; CHECK-NEXT: EMIT vp<[[EXIT_PART:%.+]]> = extract-last-part ir<%add>
+; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extract-last-lane vp<[[EXIT_PART]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -903,7 +904,8 @@ define void @zext_nneg(ptr noalias %p, ptr noalias %p1) {
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx>
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
; CHECK-NEXT: WIDEN-CAST ir<%zext> = zext nneg ir<%l>
-; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-element ir<%zext>
+; CHECK-NEXT: EMIT vp<[[EXT_PART:%.+]]> = extract-last-part ir<%zext>
+; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-lane vp<[[EXT_PART]]>
; CHECK-NEXT: CLONE store vp<[[EXT]]>, ir<%p1>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
@@ -960,7 +962,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-element ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_1_PART:%.+]]> = extract-last-part ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[RESUME_1_PART]]>
; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-penultimate-element ir<%for.1.next>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
>From d2c82259f2a166a9d60ca4efe7832604254cabf4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 17:18:35 +0000
Subject: [PATCH 2/4] !fixup fix unit test build failure
---
llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index ec8f06a1eb755..97783a63c8fb9 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -385,7 +385,7 @@ TEST_F(VPIRVerifierTest, testVerifyIRPhiInExitVPIRBB) {
auto *HeaderBlock =
cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getEntry());
VPInstruction *DefI =
- new VPInstruction(VPInstruction::ExtractLastElement,
+ new VPInstruction(VPInstruction::ExtractLastLane,
{HeaderBlock->front().getVPSingleValue()});
DefI->insertBefore(Plan->getMiddleBlock()->getTerminator());
Plan->getExitBlocks()[0]->front().addOperand(DefI);
>From 64b32feb05a21e370f7d4d592ab1e7b6b0acb781 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 6 Dec 2025 21:07:02 +0000
Subject: [PATCH 3/4] !fixup update to main, use more verbose naming, simplify
code a bit
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++------
llvm/lib/Transforms/Vectorize/VPlan.h | 8 ++++----
llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 5 +----
llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 2 +-
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 3 ++-
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++---
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
7 files changed, 14 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 536b45ab497bf..4edc004f161a1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8982,12 +8982,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
U->replaceUsesOfWith(OrigExitingVPV, FinalReductionResult);
// Look through ExtractLastPart.
- if (match(U, m_ExtractLastPart(m_VPValue()))) {
- auto *ExtractPart = cast<VPInstruction>(U);
- if (ExtractPart->getNumUsers() != 1)
- continue;
- U = *ExtractPart->user_begin();
- }
+ if (match(U, m_ExtractLastPart(m_VPValue())))
+ U = cast<VPInstruction>(U)->getSingleUser();
if (match(U, m_CombineOr(m_ExtractLane(m_VPValue(), m_VPValue()),
m_ExtractLastLane(m_VPValue()))))
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index dc8d5de953c15..972823137023f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1464,10 +1464,10 @@ class VPIRInstruction : public VPRecipeBase {
return true;
}
- /// Update the recipe's first operand to the final lane of the operand using
- /// \p Builder. Must only be used for VPIRInstructions with at least one
- /// operand wrapping a PHINode.
- void extractFinalLaneOfFirstOperand(VPBuilder &Builder);
+ /// Update the recipe's first operand to the last lane of the last part of the
+ /// operand using \p Builder. Must only be used for VPIRInstructions with at
+ /// least one operand wrapping a PHINode.
+ void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder);
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c82322e304e38..c64b97579881a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -126,11 +126,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return VecTy->getElementType();
return BaseTy;
}
- case VPInstruction::ExtractLastPart: {
- // Element type of ExtractLastPart is equal to the element type of its
- // operand.
+ case VPInstruction::ExtractLastPart:
return inferScalarType(R->getOperand(0));
- }
case VPInstruction::LogicalAnd:
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 4a233f7935f90..f082b970c7762 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -414,7 +414,7 @@ template <typename Op0_t>
inline VPInstruction_match<
VPInstruction::ExtractLastLane,
VPInstruction_match<VPInstruction::ExtractLastPart, Op0_t>>
-m_ExtractFinalLane(const Op0_t &Op0) {
+m_ExtractLastLaneOfLastPart(const Op0_t &Op0) {
return m_ExtractLastLane(m_ExtractLastPart(Op0));
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a38e2100a51b..519a104b9484f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1497,7 +1497,8 @@ InstructionCost VPIRInstruction::computeCost(ElementCount VF,
return 0;
}
-void VPIRInstruction::extractFinalLaneOfFirstOperand(VPBuilder &Builder) {
+void VPIRInstruction::extractLastLaneOfLastPartOfFirstOperand(
+ VPBuilder &Builder) {
assert(isa<PHINode>(getInstruction()) &&
"can only update exiting operands to phi nodes");
assert(getNumOperands() > 0 && "must have at least one operand");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e8122b09833bd..6a196e5e02ed1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -946,7 +946,7 @@ static VPValue *optimizeLatchExitInductionUser(
VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op,
DenseMap<VPValue *, VPValue *> &EndValues, ScalarEvolution &SE) {
VPValue *Incoming;
- if (!match(Op, m_ExtractFinalLane(m_VPValue(Incoming))))
+ if (!match(Op, m_ExtractLastLaneOfLastPart(m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -3764,7 +3764,7 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its final lane.
- ExitIRI->extractFinalLaneOfFirstOperand(MiddleBuilder);
+ ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
}
VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
@@ -5093,7 +5093,7 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
// the VPIRInstruction modeling the phi.
for (VPRecipeBase &R : make_early_inc_range(
make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
- if (!match(&R, m_ExtractFinalLane(m_Specific(FOR))))
+ if (!match(&R, m_ExtractLastLaneOfLastPart(m_Specific(FOR))))
continue;
// For VF vscale x 1, if vscale = 1, we are unable to extract the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index a6418be0a89b4..6fb706ea7d64b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -371,7 +371,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
continue;
}
- if (match(&R, m_ExtractFinalLane(m_VPValue(Op0))) ||
+ if (match(&R, m_ExtractLastLaneOfLastPart(m_VPValue(Op0))) ||
match(&R, m_ExtractPenultimateElement(m_VPValue(Op0)))) {
addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
if (Plan.hasScalarVFOnly()) {
>From d92401118086fb2ed934be80f08e3bdde2f624e8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 7 Dec 2025 14:37:22 +0000
Subject: [PATCH 4/4] !fixup fix formatting
---
llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 254baeffbfd85..03b0ac1b03ff9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -499,7 +499,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
ExitIRI->getParent()->getSinglePredecessor() == MiddleVPBB &&
"exit values from early exits must be fixed when branch to "
"early-exit is added");
- ExitIRI->extractFinalLaneOfFirstOperand(B);
+ ExitIRI->extractLastLaneOfLastPartOfFirstOperand(B);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6a196e5e02ed1..320baeb454d46 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4992,9 +4992,9 @@ void VPlanTransforms::updateScalarResumePhis(
if (IsFOR) {
auto *ExtractPart = MiddleBuilder.createNaryOp(
VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
- ResumeFromVectorLoop =
- MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane,
- ExtractPart, DebugLoc::getUnknown(), "vector.recur.extract");
+ ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastLane, ExtractPart, DebugLoc::getUnknown(),
+ "vector.recur.extract");
}
ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx");
ResumePhiR->setOperand(0, ResumeFromVectorLoop);
More information about the llvm-commits
mailing list