[llvm] [VPlan] Replace ExtractFromEnd with Extract(Last|Penultimate)Lane (NFC). (PR #137030)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 11:01:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
ExtractFromEnd only has 2 uses, extracting the last and penultimate lanes. Replace it with 2 separate opcodes, removing the need to materialize and handle a constant argument.
---
Patch is 20.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137030.diff
13 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+2-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+4-4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+2-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+13-14)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+2-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+8-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e2f7c36cecd9d..0ee4216933cee 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9326,8 +9326,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPBuilder ScalarPHBuilder(ScalarPH);
- VPValue *OneVPV = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : Plan.getScalarHeader()->phis()) {
auto *ScalarPhiIRI = cast<VPIRPhi>(&ScalarPhiR);
@@ -9362,7 +9360,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
"Cannot handle loops with uncountable early exits");
if (IsFOR)
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {},
+ VPInstruction::ExtractLastLane, {ResumeFromVectorLoop}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
@@ -9440,8 +9438,6 @@ static void addExitUsersForFirstOrderRecurrences(
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
- VPValue *TwoVPV = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
@@ -9525,7 +9521,7 @@ static void addExitUsersForFirstOrderRecurrences(
if (ExitIRI->getOperand(0) != FOR)
continue;
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
- VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
+ VPInstruction::ExtractPenultimateLane, {FOR->getBackedgeValue()}, {},
"vector.recur.extract.for.phi");
ExitIRI->setOperand(0, PenultimateElement);
ExitUsersToFix.remove(ExitIRI);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0f2aac146e7a6..94382a2d7af57 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -878,10 +878,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
Broadcast,
ComputeFindLastIVResult,
ComputeReductionResult,
- // Takes the VPValue to extract from as first operand and the lane or part
- // to extract as second operand, counting from the end starting with 1 for
- // last. The second operand must be a positive constant and <= VF.
- ExtractFromEnd,
+ // Extracts the last lane from its operand.
+ ExtractLastLane,
+ // Extracts the second-to-last lane from its operand.
+ ExtractPenultimateLane,
LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 375d4c9787994..946f1db4a8885 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -88,7 +88,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return SetResultTyFromOp();
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
- case VPInstruction::ExtractFromEnd: {
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractPenultimateLane: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
return VecTy->getElementType();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a342d5182c974..d6fbfb56b967a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -725,10 +725,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
- case VPInstruction::ExtractFromEnd: {
- auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
- unsigned Offset = CI->getZExtValue();
- assert(Offset > 0 && "Offset from end must be positive");
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractPenultimateLane: {
+ unsigned Offset = getOpcode() == VPInstruction::ExtractLastLane ? 1 : 2;
Value *Res;
if (State.VF.isVector()) {
assert(Offset <= State.VF.getKnownMinValue() &&
@@ -854,7 +853,8 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
bool VPInstruction::isVectorToScalar() const {
- return getOpcode() == VPInstruction::ExtractFromEnd ||
+ return getOpcode() == VPInstruction::ExtractLastLane ||
+ getOpcode() == VPInstruction::ExtractPenultimateLane ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeFindLastIVResult ||
@@ -924,7 +924,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::AnyOf:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
- case VPInstruction::ExtractFromEnd:
+ case VPInstruction::ExtractLastLane:
+ case VPInstruction::ExtractPenultimateLane:
case VPInstruction::FirstActiveLane:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
@@ -1042,8 +1043,11 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::Broadcast:
O << "broadcast";
break;
- case VPInstruction::ExtractFromEnd:
- O << "extract-from-end";
+ case VPInstruction::ExtractLastLane:
+ O << "extract-last-lane";
+ break;
+ case VPInstruction::ExtractPenultimateLane:
+ O << "extract-penultimate-lane";
break;
case VPInstruction::ComputeFindLastIVResult:
O << "compute-find-last-iv-result";
@@ -1143,12 +1147,7 @@ void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) {
assert(getNumOperands() == 1 && "must have a single operand");
VPValue *Exiting = getOperand(0);
if (!Exiting->isLiveIn()) {
- LLVMContext &Ctx = getInstruction().getContext();
- auto &Plan = *getParent()->getPlan();
- Exiting = Builder.createNaryOp(
- VPInstruction::ExtractFromEnd,
- {Exiting,
- Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::get(Ctx, 32), 1))});
+ Exiting = Builder.createNaryOp(VPInstruction::ExtractLastLane, {Exiting});
}
setOperand(0, Exiting);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f2dc68b2ea8b6..f5e4a466f4b37 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -827,8 +827,8 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
using namespace VPlanPatternMatch;
VPValue *Incoming;
- if (!match(Op, m_VPInstruction<VPInstruction::ExtractFromEnd>(
- m_VPValue(Incoming), m_SpecificInt(1))))
+ if (!match(Op, m_VPInstruction<VPInstruction::ExtractLastLane>(
+ m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index b48a447834cc8..e404b1d09b452 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -337,14 +337,19 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
continue;
}
VPValue *Op0;
- if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(
- m_VPValue(Op0), m_VPValue(Op1)))) {
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractLastLane>(
+ m_VPValue(Op0))) ||
+ match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateLane>(
+ m_VPValue(Op0)))) {
addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
if (Plan.hasScalarVFOnly()) {
// Extracting from end with VF = 1 implies retrieving the scalar part UF
// - Op1.
unsigned Offset =
- cast<ConstantInt>(Op1->getLiveInIRValue())->getZExtValue();
+ match(&R,
+ m_VPInstruction<VPInstruction::ExtractLastLane>(m_VPValue()))
+ ? 1
+ : 2;
R.getVPSingleValue()->replaceAllUsesWith(
getValueForPart(Op0, UF - Offset));
R.eraseFromParent();
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
index 24badfb7b1042..ef72d56d082d8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll
@@ -42,7 +42,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]>
-; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-last-lane vp<[[RED_RESULT]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -107,7 +107,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
-; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-last-lane vp<[[RED_RESULT]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, ir<1024>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index bfaa09241e30b..4fcb2c9fe96c3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -62,7 +62,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: middle.block:
; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, vp<[[RDX_SELECT]]>
-; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1>
+; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-last-lane vp<[[RDX]]>
; IF-EVL-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-OUTLOOP-EMPTY:
; IF-EVL-OUTLOOP-NEXT: ir-bb<for.end>:
@@ -102,7 +102,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: middle.block:
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
-; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1>
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-last-lane vp<[[RDX]]>
; IF-EVL-INLOOP-NEXT: Successor(s): ir-bb<for.end>
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: ir-bb<for.end>:
@@ -137,7 +137,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: middle.block:
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
-; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1>
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-last-lane vp<[[RDX]]>
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; NO-VP-OUTLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]>
; NO-VP-OUTLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
@@ -185,7 +185,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: middle.block:
; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX:%.+]]> = compute-reduction-result ir<[[RDX_PHI]]>, ir<[[ADD]]>
-; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-from-end vp<[[RDX]]>, ir<1>
+; NO-VP-INLOOP-NEXT: EMIT vp<[[RDX_EX:%.+]]> = extract-last-lane vp<[[RDX]]>
; NO-VP-INLOOP-NEXT: EMIT vp<[[BOC:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; NO-VP-INLOOP-NEXT: EMIT branch-on-cond vp<[[BOC]]>
; NO-VP-INLOOP-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
index 8624d9201a7c8..0d47163e109cc 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -37,8 +37,8 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1>
-; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[FOR1_SPLICE]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -117,9 +117,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1>
-; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1>
-; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]>.1 = extract-last-lane vp<[[FOR1_SPLICE]]>
+; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]>.2 = extract-last-lane vp<[[FOR2_SPLICE]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -203,8 +203,8 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-from-end ir<%for.x.next>, ir<1>
-; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-from-end ir<%for.x.prev>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane ir<%for.x.prev>
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]>
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph
@@ -282,8 +282,8 @@ define i32 @test_chained_first_order_recurrences_5_hoist_to_load(ptr %base) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-from-end ir<%for.x.next>, ir<1>
-; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-from-end ir<%for.x.prev>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXT_X:%.+]]> = extract-last-lane ir<%for.x.next>
+; CHECK-NEXT: EMIT vp<[[EXT_Y:%.+]]>.1 = extract-last-lane ir<%for.x.prev>
; CHECK-NEXT: EMIT vp<[[MIDDLE_C:%.+]]> = icmp eq ir<4098>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[MIDDLE_C]]>
; CHECK-NEXT: Successor(s): ir-bb<ret>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 794ff99a68672..0805cf3bcb01b 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -214,7 +214,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]>
-; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-last-lane vp<[[RED_RES]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<exit>
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index 671ed2af52621..dd400737bef01 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -220,7 +220,7 @@ exit:
; DBG-NEXT: Successor(s): middle.block
; DBG-EMPTY:
; DBG-NEXT: middle.block:
-; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end vp<[[SCALAR_STEPS]]>, ir<1>
+; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-last-lane vp<[[SCALAR_STEPS]]>
; DBG-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, vp<[[VEC_TC]]>
; DBG-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; DBG-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index d0d276ab967fa..0709372890dac 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -35,7 +35,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%red>, ir<%red.next>
-; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-last-lane vp<[[RED_RES]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -176,7 +176,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%sum.07>, ir<[[MULADD]]>
-; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-from-end vp<[[RED_RES]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-last-lane vp<[[RED_RES]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
@@ -238,7 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<%cond>
-; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-from-end vp<[[RDX_RES]]>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXT:%.+]]> = extract-last-lane vp<[[RDX_RES]]>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index fc80d348042a0..b4e97a401c58e 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -538,7 +538,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extract-from-end ir<%add>, ir<1>
+; CHECK-NEXT: EMIT vp<[[EXIT:%.+]]> = extr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/137030
More information about the llvm-commits
mailing list