[llvm] [VPlan] Replace ExtractLast(Elem|LanePerPart) with ExtractLast(Lane/Part) (PR #164124)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 18 13:36:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
…
Replace ExtractLastElement and ExtractLastLanePerPart with more generic and specific ExtractLastLane and ExtractLastPart, which model distinct parts of extracting across parts and lanes. ExtractLastElement == ExtractLastLane(ExtractLastPart) and ExtractLastLanePerPart == ExtractLastLane, the latter clarifying the name of the opcode. A new m_ExtractLastElement matcher is provided for convenience.
The patch should be NFC modulo printing changes.
---
Patch is 39.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164124.diff
14 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+4-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+5-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+14-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+13-14)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+30-32)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+22-15)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+14-6)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+18-9)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+3-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll (+18-8)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+8-4)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0e0b0427ae488..a36ff4aad93aa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1008,12 +1008,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ComputeAnyOfResult,
ComputeFindIVResult,
ComputeReductionResult,
- // Extracts the last lane from its operand if it is a vector, or the last
- // part if scalar. In the latter case, the recipe will be removed during
- // unrolling.
- ExtractLastElement,
- // Extracts the last lane for each part from its operand.
- ExtractLastLanePerPart,
+ // Extracts the last part of its operand.
+ ExtractLastPart,
+ // Extracts the last lane of the current part of its operand.
+ ExtractLastLane,
// Extracts the second-to-last lane from its operand or the second-to-last
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index f413c63c6d14c..276f7e03ea0be 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -115,14 +115,17 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return inferScalarType(R->getOperand(1));
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractPenultimateElement: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
return VecTy->getElementType();
return BaseTy;
}
+ case VPInstruction::ExtractLastPart: {
+ // ExtractLastPart returns the same type as its operand
+ return inferScalarType(R->getOperand(0));
+ }
case VPInstruction::LogicalAnd:
assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index d8203e251a5d1..a0b7fde957756 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -383,15 +383,23 @@ m_EVL(const Op0_t &Op0) {
}
template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::ExtractLastElement, Op0_t>
-m_ExtractLastElement(const Op0_t &Op0) {
- return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
+inline VPInstruction_match<VPInstruction::ExtractLastLane, Op0_t>
+m_ExtractLastLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastLane>(Op0);
}
template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
-m_ExtractLastLanePerPart(const Op0_t &Op0) {
- return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+inline VPInstruction_match<VPInstruction::ExtractLastPart, Op0_t>
+m_ExtractLastPart(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastPart>(Op0);
+}
+
+template <typename Op0_t>
+inline VPInstruction_match<
+ VPInstruction::ExtractLastLane,
+ VPInstruction_match<VPInstruction::ExtractLastPart, Op0_t>>
+m_ExtractLastElement(const Op0_t &Op0) {
+ return m_ExtractLastLane(m_ExtractLastPart(Op0));
}
template <typename Op0_t, typename Op1_t, typename Op2_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7a98c7595fe6a..331c1109a55ef 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -510,8 +510,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
@@ -879,8 +879,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
- case VPInstruction::ExtractLastLanePerPart:
- case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractPenultimateElement: {
unsigned Offset =
getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
@@ -1148,7 +1147,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
I32Ty, {Arg0Ty, I32Ty, I1Ty});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
- case VPInstruction::ExtractLastElement: {
+ case VPInstruction::ExtractLastLane: {
// Add on the cost of extracting the element.
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,
@@ -1168,8 +1167,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
bool VPInstruction::isVectorToScalar() const {
- return getOpcode() == VPInstruction::ExtractLastElement ||
- getOpcode() == VPInstruction::ExtractLastLanePerPart ||
+ return getOpcode() == VPInstruction::ExtractLastLane ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
@@ -1232,8 +1230,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExtractLane:
- case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractLastLanePerPart:
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
@@ -1378,11 +1376,11 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractLane:
O << "extract-lane";
break;
- case VPInstruction::ExtractLastElement:
- O << "extract-last-element";
+ case VPInstruction::ExtractLastLane:
+ O << "extract-last-lane";
break;
- case VPInstruction::ExtractLastLanePerPart:
- O << "extract-last-lane-per-part";
+ case VPInstruction::ExtractLastPart:
+ O << "extract-last-part";
break;
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
@@ -1542,7 +1540,8 @@ void VPIRInstruction::extractLastLaneOfFirstOperand(VPBuilder &Builder) {
if (Exiting->isLiveIn())
return;
- Exiting = Builder.createNaryOp(VPInstruction::ExtractLastElement, {Exiting});
+ Exiting = Builder.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
+ Exiting = Builder.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
setOperand(0, Exiting);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cae9aee82c9c3..27be581a6849c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1206,9 +1206,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- // Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
- m_ExtractLastLanePerPart(m_BuildVector())))) {
+ // Look through ExtractLastLane (BuildVector ....).
+ if (match(&R, m_ExtractLastLane(m_BuildVector()))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1274,15 +1273,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(Def,
- m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
- m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
+ if (match(Def, m_ExtractLastLane(m_Broadcast(m_VPValue(A))))) {
Def->replaceAllUsesWith(A);
return;
}
- if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
- m_ExtractLastLanePerPart(m_VPValue(A)))) &&
+ if (match(Def, m_ExtractLastLane(m_VPValue(A))) &&
((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
(isa<VPReplicateRecipe>(A) &&
cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1291,11 +1287,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
}
- if (Plan->getUF() == 1 &&
- match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
- return Def->replaceAllUsesWith(
- Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
- }
+ if (Plan->getUF() == 1 && match(Def, m_ExtractLastPart(m_VPValue(A))))
+ return Def->replaceAllUsesWith(A);
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1333,13 +1326,14 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
Clone->insertBefore(RepOrWidenR);
- unsigned ExtractOpc =
- vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
- ? VPInstruction::ExtractLastElement
- : VPInstruction::ExtractLastLanePerPart;
- auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
- Ext->insertBefore(Clone);
- Clone->setOperand(0, Ext);
+ VPBuilder Builder(Clone);
+ VPValue *ExtractOp = Clone->getOperand(0);
+ if (vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)))
+ ExtractOp =
+ Builder.createNaryOp(VPInstruction::ExtractLastPart, ExtractOp);
+ ExtractOp =
+ Builder.createNaryOp(VPInstruction::ExtractLastLane, ExtractOp);
+ Clone->setOperand(0, ExtractOp);
RepR->eraseFromParent();
continue;
}
@@ -1350,9 +1344,7 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
if (!vputils::isSingleScalar(RepOrWidenR) ||
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
return U->usesScalars(RepOrWidenR) ||
- match(cast<VPRecipeBase>(U),
- m_CombineOr(m_ExtractLastElement(m_VPValue()),
- m_ExtractLastLanePerPart(m_VPValue())));
+ match(cast<VPRecipeBase>(U), m_ExtractLastPart(m_VPValue()));
}))
continue;
@@ -4316,10 +4308,13 @@ void VPlanTransforms::addScalarResumePhis(
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
"Cannot handle loops with uncountable early exits");
- if (IsFOR)
- ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
- "vector.recur.extract");
+ if (IsFOR) {
+ auto *ExtractPart = MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractLastPart, ResumeFromVectorLoop);
+ ResumeFromVectorLoop =
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastLane,
+ ExtractPart, {}, "vector.recur.extract");
+ }
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
@@ -4417,10 +4412,11 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
// the VPIRInstruction modeling the phi.
- for (VPUser *U : FOR->users()) {
- using namespace llvm::VPlanPatternMatch;
- if (!match(U, m_ExtractLastElement(m_Specific(FOR))))
+ for (VPRecipeBase &R : make_early_inc_range(
+ make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
+ if (!match(&R, m_ExtractLastElement(m_Specific(FOR))))
continue;
+
// For VF vscale x 1, if vscale = 1, we are unable to extract the
// penultimate value of the recurrence. Instead we rely on the existing
// extract of the last element from the result of
@@ -4430,9 +4426,11 @@ void VPlanTransforms::addExitUsersForFirstOrderRecurrences(VPlan &Plan,
Range))
return;
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractPenultimateElement, {FOR->getBackedgeValue()},
+ VPInstruction::ExtractPenultimateElement,
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart,
+ FOR->getBackedgeValue()),
{}, "vector.recur.extract.for.phi");
- cast<VPInstruction>(U)->replaceAllUsesWith(PenultimateElement);
+ cast<VPInstruction>(&R)->replaceAllUsesWith(PenultimateElement);
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 5aeda3e11b138..96dc1d8d2525a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -372,22 +372,27 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
R.addOperand(getValueForPart(Op1, Part));
continue;
}
- if (match(&R, m_ExtractLastElement(m_VPValue(Op0))) ||
- match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
- m_VPValue(Op0)))) {
- addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
+
+ // Handle extraction from the last part. For scalar VF, directly replace
+ // with the appropriate scalar part. Otherwise, update operand to use the
+ // part.
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
+ m_ExtractLastPart(m_VPValue(Op0)))) ||
+ match(&R, m_ExtractLastElement(m_VPValue(Op0)))) {
+ auto *I = cast<VPInstruction>(&R);
+ bool IsPenultimate =
+ I->getOpcode() == VPInstruction::ExtractPenultimateElement;
+ unsigned PartIdx = IsPenultimate ? UF - 2 : UF - 1;
+
if (Plan.hasScalarVFOnly()) {
- auto *I = cast<VPInstruction>(&R);
- // Extracting from end with VF = 1 implies retrieving the last or
- // penultimate scalar part (UF-1 or UF-2).
- unsigned Offset =
- I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
- I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
- R.eraseFromParent();
- } else {
- // Otherwise we extract from the last part.
- remapOperands(&R, UF - 1);
+ // For scalar VF, directly use the scalar part value.
+ addUniformForAllParts(I);
+ I->replaceAllUsesWith(getValueForPart(Op0, PartIdx));
+ continue;
}
+ // For vector VF, extract from the last part.
+ addUniformForAllParts(I);
+ R.setOperand(0, getValueForPart(Op0, UF - 1));
continue;
}
@@ -480,8 +485,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
continue;
}
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
+ auto *ExtractPart =
+ Builder.createNaryOp(VPInstruction::ExtractLastPart, {Op});
NewOps.push_back(
- Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
+ Builder.createNaryOp(VPInstruction::ExtractLastLane, {ExtractPart}));
continue;
}
if (vputils::isSingleScalar(Op)) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index d4e5dea3d4aab..77e6556535863 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -42,7 +42,9 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT_PART2:%.+]]> = extract-last-part vp<[[RED_RESULT_PART]]>
+; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = extract-last-lane vp<[[RED_RESULT_PART2]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -53,7 +55,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VEC_TC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<[[RED_RESULT_PART]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index dff4971ffdfa1..7932adbe158b8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -58,7 +58,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-NEXT: Successor(s): middle.block
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: middle.block:
-; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]>
; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: ir-bb<for.end>:
@@ -97,7 +99,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: Successor(s): middle.block
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: middle.block:
-; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]>
; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: ir-bb<for.end>:
@@ -131,7 +135,9 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: Successor(s): middle.block
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: middle.block:
-; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_PART2:%.+]]> = extract-last-part vp<[[RDX_PART]]>
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = extract-last-lane vp<[[RDX_PART2]]>
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; NO-VP-OUTLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]>
; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
@@ -142,7 +148,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: scalar.ph:
; NO-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/164124
More information about the llvm-commits
mailing list