[llvm] [VPlan] Introduce VPInstruction::InsertLastLane for start value of VPFirstOrderRecurrencePHIRecipe. (PR #171590)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 10 02:16:48 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Mel Chen (Mel-Chen)
<details>
<summary>Changes</summary>
Currently, VPFirstOrderRecurrencePHIRecipe sets the start value from the preheader to [poison, ..., poison, start] only during ::execute when generating the PHI node. This patch uses VPInstruction::InsertLastLane directly in the VPlan to represent this behavior, instead of hiding it in ::execute. This allows redundant VPInstruction::InsertLastLane to be eliminated via VPlanTransform::cse.
---
Patch is 61.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171590.diff
18 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+17-19)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+20-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+14-22)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+18-9)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+14-8)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+2-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index afb654ed882f4..20ca30b029bb9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1082,6 +1082,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
ExtractPenultimateElement,
+ // Inserts the second operand into the last lane of first operand.
+ InsertLastLane,
LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index c64b97579881a..f83af484dee9b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -77,6 +77,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case Instruction::Freeze:
case VPInstruction::ReductionStartVector:
case VPInstruction::ResumeForEpilogue:
+ case VPInstruction::InsertLastLane:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b0c8564ad231a..08d078c56a4aa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -456,6 +456,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
+ case VPInstruction::InsertLastLane:
case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
return 2;
@@ -702,6 +703,15 @@ Value *VPInstruction::generate(VPTransformState &State) {
Builder.getInt32(Idx));
return Res;
}
+ case VPInstruction::InsertLastLane: {
+ if (State.VF.isScalar())
+ return State.get(getOperand(1), true);
+ Value *Vec = State.get(getOperand(0));
+ Value *Elt = State.get(getOperand(1), /*IsScalar=*/true);
+ Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
+ Value *LastIdx = Builder.CreateSub(RuntimeVF, Builder.getInt32(1));
+ return Builder.CreateInsertElement(Vec, Elt, LastIdx);
+ }
case VPInstruction::ReductionStartVector: {
if (State.VF.isScalar())
return State.get(getOperand(0), true);
@@ -1193,6 +1203,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractLastPart:
case VPInstruction::ExtractPenultimateElement:
+ case VPInstruction::InsertLastLane:
case VPInstruction::ActiveLaneMask:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::FirstActiveLane:
@@ -1222,6 +1233,7 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
default:
return false;
case Instruction::ExtractElement:
+ case VPInstruction::InsertLastLane:
return Op == getOperand(1);
case Instruction::PHI:
return true;
@@ -1337,6 +1349,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::BuildVector:
O << "buildvector";
break;
+ case VPInstruction::InsertLastLane:
+ O << "insert-last-lane";
+ break;
case VPInstruction::ExtractLane:
O << "extract-lane";
break;
@@ -4304,27 +4319,10 @@ void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
#endif
void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- // Create a vector from the initial value.
- auto *VectorInit = getStartValue()->getLiveInIRValue();
-
- Type *VecTy = State.VF.isScalar()
- ? VectorInit->getType()
- : VectorType::get(VectorInit->getType(), State.VF);
-
+ Value *VectorInit = State.get(getStartValue(), State.VF.isScalar());
+ Type *VecTy = VectorInit->getType();
BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- if (State.VF.isVector()) {
- auto *IdxTy = Builder.getInt32Ty();
- auto *One = ConstantInt::get(IdxTy, 1);
- IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(VectorPH->getTerminator());
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
- auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- VectorInit = Builder.CreateInsertElement(
- PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
- }
-
// Create a phi node for the new recurrence.
PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 852196e589c59..c1fdda94aae4a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1443,8 +1443,16 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
return;
}
+ if (match(Def, m_VPInstruction<VPInstruction::InsertLastLane>(
+ m_VPValue(), m_VPValue(A))))
+ if (Plan->hasScalarVFOnly())
+ return Def->replaceAllUsesWith(A);
+
if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
- if (Phi->getOperand(0) == Phi->getOperand(1))
+ if (Phi->getOperand(0) == Phi->getOperand(1) ||
+ match(Phi->getOperand(0),
+ m_VPInstruction<VPInstruction::InsertLastLane>(
+ m_VPValue(), m_Specific(Phi->getOperand(1)))))
Phi->replaceAllUsesWith(Phi->getOperand(0));
return;
}
@@ -2238,6 +2246,8 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &LoopBuilder) {
VPDominatorTree VPDT(Plan);
+ VPTypeAnalysis TypeInfo(Plan);
+ VPBuilder PHBuilder(Plan.getVectorPreheader());
SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
for (VPRecipeBase &R :
@@ -2246,6 +2256,15 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
RecurrencePhis.push_back(FOR);
for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
+ /// Adjust start value of fixed-order recurrence phi to [poison, ... ,
+ /// poison, start value].
+ VPValue *StartV = FOR->getStartValue();
+ VPValue *NewStart = PHBuilder.createNaryOp(
+ VPInstruction::InsertLastLane, {Plan.getOrAddLiveIn(PoisonValue::get(
+ TypeInfo.inferScalarType(StartV))),
+ StartV});
+ FOR->setOperand(0, NewStart);
+
SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
// Fixed-order recurrences do not contain cycles, so this loop is guaranteed
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
index 16e9d410e4aa7..6ddb23079a287 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -69,7 +69,7 @@ for.body:
define void @PR34711(ptr %a, ptr %b, ptr %c, i64 %n) #0 {
; CHECK-VF4UF1-LABEL: @PR34711
; CHECK-VF4UF1: vector.body
-; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
+; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %[[VEC_RECUR_INIT:.*]], %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
; CHECK-VF4UF1: %[[MGATHER]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> {{.*}}, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison)
; CHECK-VF4UF1-NEXT: %[[SPLICE:.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> %[[VEC_RECUR]], <vscale x 4 x i16> %[[MGATHER]], i32 -1)
; CHECK-VF4UF1-NEXT: %[[SXT1:.*]] = sext <vscale x 4 x i16> %[[SPLICE]] to <vscale x 4 x i32>
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index f2c0ca30a6c18..c5d389fa19e2e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -61,6 +61,10 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32()
+; VSCALEFORTUNING2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 4
+; VSCALEFORTUNING2-NEXT: [[TMP17:%.*]] = sub i32 [[TMP6]], 1
+; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP17]]
; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = add i64 [[Y]], 1
; VSCALEFORTUNING2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP7]]
; VSCALEFORTUNING2-NEXT: [[TMP9:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
@@ -73,19 +77,11 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i32> [[TMP14]] to <vscale x 4 x i64>
; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP15]]
-; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
-; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP18]], 4
-; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP20]]
-; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
-; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 4
-; VSCALEFORTUNING2-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP23]]
; VSCALEFORTUNING2-NEXT: br label %[[VECTOR_BODY:.*]]
; VSCALEFORTUNING2: [[VECTOR_BODY]]:
; VSCALEFORTUNING2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
+; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[TMP18]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
+; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[TMP18]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP47:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP8]], align 4
@@ -133,13 +129,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VSCALEFORTUNING2: [[SCALAR_PH]]:
; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: br label %[[LOOP:.*]]
; VSCALEFORTUNING2: [[LOOP]]:
; VSCALEFORTUNING2-NEXT: [[TMP54:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ]
-; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
+; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT10]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[TMP56:%.*]] = add i64 [[Y]], 1
@@ -182,6 +178,10 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; PRED-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
+; PRED-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4
+; PRED-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1
+; PRED-NEXT: [[TMP21:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP5]]
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP0]], [[TMP7]]
@@ -200,20 +200,12 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i32> [[TMP17]], splat (i32 1)
; PRED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[TMP18]] to <vscale x 4 x i64>
; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP19]]
-; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; PRED-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 4
-; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
-; PRED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP24]]
-; PRED-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32()
-; PRED-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4
-; PRED-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1
-; PRED-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP27]]
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
-; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
+; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[TMP21]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
+; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[TMP21]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP41:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP12]], align 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP28]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 8935010e71676..d59baf2169794 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1272,15 +1272,15 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP10]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]]
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
+; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2
-; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1
-; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP34]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
index e35db479dc963..d61b1d5a9843d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
@@ -12,17 +12,17 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 0, i32 [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 2 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[VP_OP_LOAD:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 23, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP7]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AV...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171590
More information about the llvm-commits
mailing list