[llvm] [VPlan] Model FOR resume value extraction in VPlan. (PR #93396)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 25 21:32:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
This patch introduces a new ExtractRecurrenceResume VPInstruction opcode
to extract the value of a FOR to be used as resume value for the ph in
the scalar loop.
Note that it takes a VPFirstOrderRecurrencePHIRecipe as operand. This is
needed at the moment to conveniently let fixFixedOrderRecurrence still
handle creating and patching up the phis in the scalar pre-header and
header. As ExtractRecurrenceResume won't have any users yet, it is
marked as having side-effects until the phi in the scalar pre-header is
also created and managed by VPlan.
Depends on https://github.com/llvm/llvm-project/pull/93395
---
Patch is 80.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93396.diff
21 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+16-113)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+7-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+43)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+59)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr72969.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+5)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll (+28-28)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+6)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+34-42)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+1)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+8-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+3-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 48981a6bd39e3..6e8bd83473148 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -608,8 +608,7 @@ class InnerLoopVectorizer {
/// Create the exit value of first order recurrences in the middle block and
/// update their users.
- void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
- VPTransformState &State);
+ void fixFixedOrderRecurrence(VPInstruction *VPI, VPTransformState &State);
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
@@ -3391,16 +3390,18 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
fixNonInductionPHIs(Plan, State);
// At this point every instruction in the original loop is widened to a
- // vector form. Now we need to fix the recurrences in the loop. These PHI
- // nodes are currently empty because we did not want to introduce cycles.
- // This is the second stage of vectorizing recurrences. Note that fixing
- // reduction phis are already modeled in VPlan.
- // TODO: Also model fixing fixed-order recurrence phis in VPlan.
+ // vector form. Note that fixing reduction phis, as well as extracting the
+ // exit and resume values for fixed-order recurrences are already modeled in
+ // VPlan. All that remains to do here is creating a phi in the scalar
+ // pre-header for each fixed-rder recurrence resume value.
+ // TODO: Also model creating phis in the scalar pre-header in VPlan.
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
- VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
- for (VPRecipeBase &R : HeaderVPBB->phis()) {
- if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
- fixFixedOrderRecurrence(FOR, State);
+ VPBasicBlock *ExitVPBB =
+ cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
+ for (VPRecipeBase &R : *ExitVPBB) {
+ if (auto *VPI = dyn_cast<VPInstruction>(&R))
+ if (VPI->getOpcode() == VPInstruction::ExtractRecurrenceResume)
+ fixFixedOrderRecurrence(VPI, State);
}
// Forget the original basic block.
@@ -3469,111 +3470,13 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VF.getKnownMinValue() * UF);
}
-void InnerLoopVectorizer::fixFixedOrderRecurrence(
- VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
- // This is the second phase of vectorizing first-order recurrences. An
- // overview of the transformation is described below. Suppose we have the
- // following loop.
- //
- // for (int i = 0; i < n; ++i)
- // b[i] = a[i] - a[i - 1];
- //
- // There is a first-order recurrence on "a". For this loop, the shorthand
- // scalar IR looks like:
- //
- // scalar.ph:
- // s_init = a[-1]
- // br scalar.body
- //
- // scalar.body:
- // i = phi [0, scalar.ph], [i+1, scalar.body]
- // s1 = phi [s_init, scalar.ph], [s2, scalar.body]
- // s2 = a[i]
- // b[i] = s2 - s1
- // br cond, scalar.body, ...
- //
- // In this example, s1 is a recurrence because it's value depends on the
- // previous iteration. In the first phase of vectorization, we created a
- // vector phi v1 for s1. We now complete the vectorization and produce the
- // shorthand vector IR shown below (for VF = 4, UF = 1).
- //
- // vector.ph:
- // v_init = vector(..., ..., ..., a[-1])
- // br vector.body
- //
- // vector.body
- // i = phi [0, vector.ph], [i+4, vector.body]
- // v1 = phi [v_init, vector.ph], [v2, vector.body]
- // v2 = a[i, i+1, i+2, i+3];
- // v3 = vector(v1(3), v2(0, 1, 2))
- // b[i, i+1, i+2, i+3] = v2 - v3
- // br cond, vector.body, middle.block
- //
- // middle.block:
- // x = v2(3)
- // br scalar.ph
- //
- // scalar.ph:
- // s_init = phi [x, middle.block], [a[-1], otherwise]
- // br scalar.body
- //
- // After execution completes the vector loop, we extract the next value of
- // the recurrence (x) to use as the initial value in the scalar loop.
-
+void InnerLoopVectorizer::fixFixedOrderRecurrence(VPInstruction *VPI,
+ VPTransformState &State) {
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
- VPValue *PreviousDef = PhiR->getBackedgeValue();
- Value *Incoming = State.get(PreviousDef, UF - 1);
- auto *ExtractForScalar = Incoming;
- auto *IdxTy = Builder.getInt32Ty();
- Value *RuntimeVF = nullptr;
- if (VF.isVector()) {
- auto *One = ConstantInt::get(IdxTy, 1);
- Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
- RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
- auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- ExtractForScalar =
- Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
- }
-
- auto RecurSplice = cast<VPInstruction>(*PhiR->user_begin());
- assert(PhiR->getNumUsers() == 1 &&
- RecurSplice->getOpcode() ==
- VPInstruction::FirstOrderRecurrenceSplice &&
- "recurrence phi must have a single user: FirstOrderRecurrenceSplice");
- SmallVector<VPLiveOut *> LiveOuts;
- for (VPUser *U : RecurSplice->users())
- if (auto *LiveOut = dyn_cast<VPLiveOut>(U))
- LiveOuts.push_back(LiveOut);
-
- if (!LiveOuts.empty()) {
- // Extract the second last element in the middle block if the
- // Phi is used outside the loop. We need to extract the phi itself
- // and not the last element (the phi update in the current iteration). This
- // will be the value when jumping to the exit block from the
- // LoopMiddleBlock, when the scalar loop is not run at all.
- Value *ExtractForPhiUsedOutsideLoop = nullptr;
- if (VF.isVector()) {
- auto *Idx = Builder.CreateSub(RuntimeVF, ConstantInt::get(IdxTy, 2));
- ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
- Incoming, Idx, "vector.recur.extract.for.phi");
- } else {
- assert(UF > 1 && "VF and UF cannot both be 1");
- // When loop is unrolled without vectorizing, initialize
- // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled
- // value of `Incoming`. This is analogous to the vectorized case above:
- // extracting the second last element when VF > 1.
- ExtractForPhiUsedOutsideLoop = State.get(PreviousDef, UF - 2);
- }
-
- for (VPLiveOut *LiveOut : LiveOuts) {
- assert(!Cost->requiresScalarEpilogue(VF.isVector()));
- PHINode *LCSSAPhi = LiveOut->getPhi();
- LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
- State.Plan->removeLiveOut(LCSSAPhi);
- }
- }
+ Value *ExtractForScalar = State.get(VPI, UF - 1, true);
+ auto *PhiR = cast<VPFirstOrderRecurrencePHIRecipe>(VPI->getOperand(0));
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 3aee17921086d..6c63c4715f697 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -167,8 +167,8 @@ class VPLane {
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
- static VPLane getLastLaneForVF(const ElementCount &VF) {
- unsigned LaneOffset = VF.getKnownMinValue() - 1;
+ static VPLane getLastLaneForVF(const ElementCount &VF, unsigned Offset = 1) {
+ unsigned LaneOffset = VF.getKnownMinValue() - Offset;
Kind LaneKind;
if (VF.isScalable())
// In this case 'LaneOffset' refers to the offset from the start of the
@@ -1179,6 +1179,8 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
+ ExtractRecurrenceResult,
+ ExtractRecurrenceResume,
LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
@@ -3612,7 +3614,9 @@ inline bool isUniformAfterVectorization(VPValue *VPV) {
if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
return all_of(GEP->operands(), isUniformAfterVectorization);
if (auto *VPI = dyn_cast<VPInstruction>(Def))
- return VPI->getOpcode() == VPInstruction::ComputeReductionResult;
+ return VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
+ VPI->getOpcode() == VPInstruction::ExtractRecurrenceResult ||
+ VPI->getOpcode() == VPInstruction::ExtractRecurrenceResume;
return false;
}
} // end namespace vputils
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5eb99ffd1e10e..8fe9c049ca161 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -137,6 +137,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::ExtractRecurrenceResult:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
return false;
@@ -300,6 +301,8 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ComputeReductionResult:
+ case VPInstruction::ExtractRecurrenceResult:
+ case VPInstruction::ExtractRecurrenceResume:
case VPInstruction::PtrAdd:
case VPInstruction::ExplicitVectorLength:
return true;
@@ -558,6 +561,39 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
return ReducedPartRdx;
}
+ case VPInstruction::ExtractRecurrenceResult: {
+ if (Part != 0)
+ return State.get(this, 0, /*IsScalar*/ true);
+
+ // Extract the second last element in the middle block for users outside the
+ // loop.
+ Value *Res;
+ if (State.VF.isVector()) {
+ Res = State.get(
+ getOperand(0),
+ VPIteration(State.UF - 1, VPLane::getLastLaneForVF(State.VF, 2)));
+ } else {
+ assert(State.UF > 1 && "VF and UF cannot both be 1");
+ // When loop is unrolled without vectorizing, retrieve the value just
+ // prior to the final unrolled value. This is analogous to the vectorized
+ // case above: extracting the second last element when VF > 1.
+ Res = State.get(getOperand(0), State.UF - 2);
+ }
+ Res->setName(Name);
+ return Res;
+ }
+ case VPInstruction::ExtractRecurrenceResume: {
+ if (Part != 0)
+ return State.get(this, 0, /*IsScalar*/ true);
+
+ auto *PhiR = cast<VPFirstOrderRecurrencePHIRecipe>(getOperand(0));
+ VPValue *PreviousDef = PhiR->getBackedgeValue();
+ Value *Res =
+ State.get(PreviousDef, VPIteration(State.UF - 1,
+ VPLane::getLastLaneForVF(State.VF)));
+ Res->setName(Name);
+ return Res;
+ }
case VPInstruction::LogicalAnd: {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
@@ -598,6 +634,7 @@ void VPInstruction::execute(VPTransformState &State) {
bool GeneratesPerFirstLaneOnly =
canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
+ getOpcode() == VPInstruction::ExtractRecurrenceResult ||
getOpcode() == VPInstruction::ComputeReductionResult);
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
for (unsigned Part = 0; Part < State.UF; ++Part) {
@@ -692,6 +729,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCount:
O << "branch-on-count";
break;
+ case VPInstruction::ExtractRecurrenceResume:
+ O << "extract-recurrence-resume";
+ break;
+ case VPInstruction::ExtractRecurrenceResult:
+ O << "extract-recurrence-result";
+ break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 422579ea8b84f..3099bd8c620de 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -812,6 +812,8 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
RecurrencePhis.push_back(FOR);
+ VPBuilder MiddleBuilder(
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()));
for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
@@ -843,6 +845,63 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
// Set the first operand of RecurSplice to FOR again, after replacing
// all users.
RecurSplice->setOperand(0, FOR);
+
+ // This is the second phase of vectorizing first-order recurrences. An
+ // overview of the transformation is described below. Suppose we have the
+ // following loop.
+ //
+ // for (int i = 0; i < n; ++i)
+ // b[i] = a[i] - a[i - 1];
+ //
+ // There is a first-order recurrence on "a". For this loop, the shorthand
+ // scalar IR looks like:
+ //
+ // scalar.ph:
+ // s_init = a[-1]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [s_init, scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, ...
+ //
+ // In this example, s1 is a recurrence because it's value depends on the
+ // previous iteration. In the first phase of vectorization, we created a
+ // vector phi v1 for s1. We now complete the vectorization and produce the
+ // shorthand vector IR shown below (for VF = 4, UF = 1).
+ //
+ // vector.ph:
+ // v_init = vector(..., ..., ..., a[-1])
+ // br vector.body
+ //
+ // vector.body
+ // i = phi [0, vector.ph], [i+4, vector.body]
+ // v1 = phi [v_init, vector.ph], [v2, vector.body]
+ // v2 = a[i, i+1, i+2, i+3];
+ // v3 = vector(v1(3), v2(0, 1, 2))
+ // b[i, i+1, i+2, i+3] = v2 - v3
+ // br cond, vector.body, middle.block
+ //
+ // middle.block:
+ // x = v2(3)
+ // br scalar.ph
+ //
+ // scalar.ph:
+ // s_init = phi [x, middle.block], [a[-1], otherwise]
+ // br scalar.body
+ //
+ // After execution completes the vector loop, we extract the next value of
+ // the recurrence (x) to use as the initial value in the scalar loop. This
+ // is modeled by ExtractRecurrenceResume.
+ auto *Result = cast<VPInstruction>(MiddleBuilder.createNaryOp(
+ VPInstruction::ExtractRecurrenceResult, {FOR->getBackedgeValue()}, {},
+ "vector.recur.extract.for.phi"));
+ RecurSplice->replaceUsesWithIf(
+ Result, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
+ MiddleBuilder.createNaryOp(VPInstruction::ExtractRecurrenceResume, {FOR},
+ {}, "vector.recur.extract");
}
return true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
index 33d7a3a3c8ac0..50ab61cac5b1f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll
@@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index 4d9c850abdf3d..32dc2ec1d50a8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -126,9 +126,9 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -191,8 +191,8 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
@@ -332,9 +332,9 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; DEFAULT: middle.block:
+; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP15]], i32 2
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93396
More information about the llvm-commits
mailing list