[llvm] [VPlan] Expand VPWidenPointerInductionRecipe into separate recipes (PR #148274)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 11:35:08 PDT 2025
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/148274
Stacked on #148273 to be able to use VPInstruction::PtrAdd.
This is the VPWidenPointerInductionRecipe equivalent of #118638, with the motivation of allowing us to use the EVL as the induction step.
Most of the new VPlan transformation is a straightforward translation of the existing execute code.
VPUnrollPartAccessor unfortunately doesn't work outside of VPlanRecipes.cpp so here the operands are just manually checked to see if they're unrolled.
>From 8dec7cf95478273269ecbc2062b7a3254b485d5e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 12 Jul 2025 01:40:59 +0800
Subject: [PATCH 1/2] [VPlan] Allow generating vectors with
VPInstruction::ptradd. NFC
Currently a ptradd can only generate a scalar, or a series of scalars per-lane.
In an upcoming patch to expand VPWidenPointerRecipe into smaller recipes, we need to be able to generate a vector ptradd, which currently we can't do.
This adds support for generating vectors by checking to see if the offset operand is a vector: If it isn't, it will generate per-lane scalars as per usual.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++++---------
2 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9a6e4b36397b3..0d9af0210a393 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -958,8 +958,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ExtractPenultimateElement,
LogicalAnd, // Non-poison propagating logical And.
// Add an offset in bytes (second operand) to a base pointer (first
- // operand). Only generates scalar values (either for the first lane only or
- // for all lanes, depending on its uses).
+ // operand). The base pointer must be scalar, but the offset can be a
+ // scalar, multiple scalars, or a vector. If the offset is multiple scalars
+ // then it will generate multiple scalar values (either for the first lane
+ // only or for all lanes, depending on its uses).
PtrAdd,
// Returns a scalar boolean value, which is true if any lane of its
// (boolean) vector operands is true. It produces the reduced value across
@@ -998,7 +1000,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// values per all lanes, stemming from an original ingredient. This method
/// identifies the (rare) cases of VPInstructions that do so as well, w/o an
/// underlying ingredient.
- bool doesGeneratePerAllLanes() const;
+ bool doesGeneratePerAllLanes(VPTransformState &State) const;
/// Returns true if we can generate a scalar for the first lane only if
/// needed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 75ade13b09d9c..4b7d21edbb48a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -494,8 +494,9 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
}
#endif
-bool VPInstruction::doesGeneratePerAllLanes() const {
- return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
+bool VPInstruction::doesGeneratePerAllLanes(VPTransformState &State) const {
+ return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this) &&
+ !State.hasVectorValue(getOperand(1));
}
bool VPInstruction::canGenerateScalarForFirstLane() const {
@@ -848,10 +849,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateLogicalAnd(A, B, Name);
}
case VPInstruction::PtrAdd: {
- assert(vputils::onlyFirstLaneUsed(this) &&
- "can only generate first lane for PtrAdd");
Value *Ptr = State.get(getOperand(0), VPLane(0));
- Value *Addend = State.get(getOperand(1), VPLane(0));
+ Value *Addend = State.get(getOperand(1), vputils::onlyFirstLaneUsed(this));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
case VPInstruction::AnyOf: {
@@ -911,9 +910,6 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
}
- assert(!doesGeneratePerAllLanes() &&
- "Should only generate a vector value or single scalar, not scalars "
- "for all lanes.");
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind);
}
@@ -1001,7 +997,7 @@ void VPInstruction::execute(VPTransformState &State) {
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) ||
isVectorToScalar() || isSingleScalar());
- bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
+ bool GeneratesPerAllLanes = doesGeneratePerAllLanes(State);
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue();
Lane != NumLanes; ++Lane) {
>From 61e4aec21dc1e0eaee695bf9c759db9072b1488a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sat, 12 Jul 2025 02:23:24 +0800
Subject: [PATCH 2/2] [VPlan] Expand VPWidenPointerInductionRecipe into
separate recipes
Stacked on #148273 to be able to use VPInstruction::PtrAdd.
This is the VPWidenPointerInductionRecipe equivalent of #118638, with the motivation of allowing us to use the EVL as the induction step.
Most of the new VPlan transformation is a straightforward translation of the existing execute code.
VPUnrollPartAccessor unfortunately doesn't work outside of VPlanRecipes.cpp so here the operands are just manually checked to see if they're unrolled.
---
llvm/lib/Transforms/Vectorize/VPlan.cpp | 15 ---
llvm/lib/Transforms/Vectorize/VPlan.h | 16 +--
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 86 --------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 107 ++++++++++++++++++
.../LoopVectorize/AArch64/sve-widen-gep.ll | 17 ++-
.../ARM/mve-reg-pressure-vmla.ll | 18 +--
.../LoopVectorize/ARM/pointer_iv.ll | 30 ++---
.../LoopVectorize/RISCV/strided-accesses.ll | 24 ++--
.../Transforms/LoopVectorize/X86/pr48340.ll | 4 +-
.../LoopVectorize/pointer-induction.ll | 4 +-
10 files changed, 156 insertions(+), 165 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 40a55656bfa7e..2ca2e273392db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1043,21 +1043,6 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;
- if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
-
- Phi->setIncomingBlock(1, VectorLatchBB);
-
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
- continue;
- }
-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
// VPInstructions currently model scalar Phis only.
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0d9af0210a393..6d658287fe738 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2066,8 +2066,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
}
};
-class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
- public VPUnrollPartAccessor<4> {
+class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe {
bool IsScalarAfterVectorization;
public:
@@ -2095,19 +2094,14 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
- /// Generate vector values for the pointer induction.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("cannot execute this recipe, should be expanded via "
+ "expandVPWidenIntOrFpInductionRecipe");
+ };
/// Returns true if only scalar values will be generated.
bool onlyScalarsGenerated(bool IsScalable);
- /// Returns the VPValue representing the value of this induction at
- /// the first unrolled part, if it exists. Returns itself if unrolling did not
- /// take place.
- VPValue *getFirstUnrolledPartOperand() {
- return getUnrollPart(*this) == 0 ? this : getOperand(3);
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4b7d21edbb48a..1feb45abaa193 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3686,87 +3686,6 @@ bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
(!IsScalable || vputils::onlyFirstLaneUsed(this));
}
-void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
- assert(getInductionDescriptor().getKind() ==
- InductionDescriptor::IK_PtrInduction &&
- "Not a pointer induction according to InductionDescriptor!");
- assert(State.TypeAnalysis.inferScalarType(this)->isPointerTy() &&
- "Unexpected type.");
- assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
- "Recipe should have been replaced");
-
- unsigned CurrentPart = getUnrollPart(*this);
-
- // Build a pointer phi
- Value *ScalarStartValue = getStartValue()->getLiveInIRValue();
- Type *ScStValueType = ScalarStartValue->getType();
-
- BasicBlock *VectorPH =
- State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- PHINode *NewPointerPhi = nullptr;
- if (CurrentPart == 0) {
- IRBuilder<>::InsertPointGuard Guard(State.Builder);
- if (State.Builder.GetInsertPoint() !=
- State.Builder.GetInsertBlock()->getFirstNonPHIIt())
- State.Builder.SetInsertPoint(
- State.Builder.GetInsertBlock()->getFirstNonPHIIt());
- NewPointerPhi = State.Builder.CreatePHI(ScStValueType, 2, "pointer.phi");
- NewPointerPhi->addIncoming(ScalarStartValue, VectorPH);
- NewPointerPhi->setDebugLoc(getDebugLoc());
- } else {
- // The recipe has been unrolled. In that case, fetch the single pointer phi
- // shared among all unrolled parts of the recipe.
- auto *GEP =
- cast<GetElementPtrInst>(State.get(getFirstUnrolledPartOperand()));
- NewPointerPhi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- // A pointer induction, performed by using a gep
- BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
- Value *ScalarStepValue = State.get(getStepValue(), VPLane(0));
- Type *PhiType = State.TypeAnalysis.inferScalarType(getStepValue());
- Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
- // Add induction update using an incorrect block temporarily. The phi node
- // will be fixed after VPlan execution. Note that at this point the latch
- // block cannot be used, as it does not exist yet.
- // TODO: Model increment value in VPlan, by turning the recipe into a
- // multi-def and a subclass of VPHeaderPHIRecipe.
- if (CurrentPart == 0) {
- // The recipe represents the first part of the pointer induction. Create the
- // GEP to increment the phi across all unrolled parts.
- Value *NumUnrolledElems = State.get(getOperand(2), true);
-
- Value *InductionGEP = GetElementPtrInst::Create(
- State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(
- ScalarStepValue,
- State.Builder.CreateTrunc(NumUnrolledElems, PhiType)),
- "ptr.ind", InductionLoc);
-
- NewPointerPhi->addIncoming(InductionGEP, VectorPH);
- }
-
- // Create actual address geps that use the pointer phi as base and a
- // vectorized version of the step value (<step*0, ..., step*N>) as offset.
- Type *VecPhiType = VectorType::get(PhiType, State.VF);
- Value *StartOffsetScalar = State.Builder.CreateMul(
- RuntimeVF, ConstantInt::get(PhiType, CurrentPart));
- Value *StartOffset =
- State.Builder.CreateVectorSplat(State.VF, StartOffsetScalar);
- // Create a vector of consecutive numbers from zero to VF.
- StartOffset = State.Builder.CreateAdd(
- StartOffset, State.Builder.CreateStepVector(VecPhiType));
-
- assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
- "scalar step must be the same across all parts");
- Value *GEP = State.Builder.CreateGEP(
- State.Builder.getInt8Ty(), NewPointerPhi,
- State.Builder.CreateMul(StartOffset, State.Builder.CreateVectorSplat(
- State.VF, ScalarStepValue)),
- "vector.gep");
- State.set(this, GEP);
-}
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -3925,11 +3844,6 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
- // Manually move it with the other PHIs in case PHI recipes above this one
- // also inserted non-phi instructions.
- // TODO: Remove once VPWidenPointerInductionRecipe is also expanded in
- // convertToConcreteRecipes.
- VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt());
State.set(this, VecPhi);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 581af67c88bf9..b96ac9f36bcd3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2675,6 +2675,107 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
WidenIVR->replaceAllUsesWith(WidePHI);
}
+/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the
+/// initial value, phi and backedge value. In the following example:
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf
+/// ...
+/// EMIT branch-on-count ...
+/// }
+///
+/// WIDEN-POINTER-INDUCTION will get expanded to:
+///
+/// <x1> vector loop: {
+/// vector.body:
+/// EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind
+/// EMIT %mul = mul %stepvector, %step
+/// EMIT %vector.gep = ptradd %pointer.phi, %add
+/// ...
+/// EMIT %ptr.ind = ptradd %pointer.phi, %vf
+/// EMIT branch-on-count ...
+/// }
+static void
+expandVPWidenPointerInductionRecipe(VPWidenPointerInductionRecipe *R,
+ VPTypeAnalysis &TypeInfo) {
+ VPlan *Plan = R->getParent()->getPlan();
+
+ assert(R->getInductionDescriptor().getKind() ==
+ InductionDescriptor::IK_PtrInduction &&
+ "Not a pointer induction according to InductionDescriptor!");
+ assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type.");
+ assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) &&
+ "Recipe should have been replaced");
+
+ unsigned CurrentPart = 0;
+ if (R->getNumOperands() > 3)
+ CurrentPart =
+ cast<ConstantInt>(R->getOperand(4)->getLiveInIRValue())->getZExtValue();
+
+ VPBuilder Builder(R);
+ DebugLoc DL = R->getDebugLoc();
+
+ // Build a pointer phi
+ VPPhi *Phi;
+ if (CurrentPart == 0) {
+ Phi = Builder.createScalarPhi({R->getStartValue()}, R->getDebugLoc(),
+ "pointer.phi");
+ } else {
+ // The recipe has been unrolled. In that case, fetch the single pointer phi
+ // shared among all unrolled parts of the recipe.
+ auto *PtrAdd = cast<VPInstruction>(R->getOperand(3));
+ Phi = cast<VPPhi>(PtrAdd->getOperand(0)->getDefiningRecipe());
+ }
+
+ Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
+
+ // A pointer induction, performed by using a gep
+ Type *PhiType = TypeInfo.inferScalarType(R->getStepValue());
+ VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc(
+ &Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL);
+ if (CurrentPart == 0) {
+ // The recipe represents the first part of the pointer induction. Create the
+ // GEP to increment the phi across all unrolled parts.
+ VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc(
+ R->getOperand(2), PhiType, TypeInfo.inferScalarType(R->getOperand(2)),
+ DL);
+ VPValue *Offset = Builder.createNaryOp(
+ Instruction::Mul, {R->getStepValue(), NumUnrolledElems});
+
+ VPBuilder::InsertPointGuard Guard(Builder);
+ VPBasicBlock *ExitingBB =
+ Plan->getVectorLoopRegion()->getExitingBasicBlock();
+ Builder.setInsertPoint(ExitingBB,
+ ExitingBB->getTerminator()->getIterator());
+
+ VPValue *InductionGEP = Builder.createPtrAdd(Phi, Offset, DL, "ptr.ind");
+ Phi->addOperand(InductionGEP);
+ }
+
+ VPValue *CurrentPartV =
+ Plan->getOrAddLiveIn(ConstantInt::get(PhiType, CurrentPart));
+
+ // Create actual address geps that use the pointer phi as base and a
+ // vectorized version of the step value (<step*0, ..., step*N>) as offset.
+ VPValue *StartOffsetScalar =
+ Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV});
+ VPValue *StartOffset =
+ Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar);
+ // Create a vector of consecutive numbers from zero to VF.
+ StartOffset = Builder.createNaryOp(
+ Instruction::Add,
+ {StartOffset,
+ Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)});
+
+ VPValue *PtrAdd = Builder.createPtrAdd(
+ Phi,
+ Builder.createNaryOp(Instruction::Mul, {StartOffset, R->getStepValue()}),
+ DL, "vector.gep");
+
+ R->replaceAllUsesWith(PtrAdd);
+}
+
void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
// Replace loop regions with explicity CFG.
SmallVector<VPRegionBlock *> LoopRegions;
@@ -2711,6 +2812,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
continue;
}
+ if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
+ expandVPWidenPointerInductionRecipe(WidenIVR, TypeInfo);
+ ToRemove.push_back(WidenIVR);
+ continue;
+ }
+
if (auto *Expr = dyn_cast<VPExpressionRecipe>(&R)) {
Expr->decompose();
ToRemove.push_back(Expr);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index e58ea655d6098..5aee65fd1c59d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -67,10 +67,8 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START_2]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP6]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP12]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -159,17 +157,16 @@ define void @pointer_induction(ptr noalias %start, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP6]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = mul <vscale x 2 x i64> [[TMP13]], splat (i64 1)
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x ptr> [[VECTOR_GEP]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i64> [[DOTSPLAT]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x i64> [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, ptr [[TMP16]], align 1
; CHECK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i8> [[WIDE_LOAD]], splat (i8 1)
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
index 4c29a3a0d1d01..6e16003f11757 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reg-pressure-vmla.ll
@@ -29,14 +29,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[IN]], %[[VECTOR_PH]] ], [ [[PTR_IND3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[OUT]], %[[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 [[N]])
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2
; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP1]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i8> poison), !alias.scope [[META0]]
; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_MASKED_GATHER]] to <4 x i32>
@@ -66,14 +66,14 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: [[TMP27:%.*]] = add nuw <4 x i32> [[TMP26]], [[TMP24]]
; CHECK-NEXT: [[TMP28:%.*]] = lshr <4 x i32> [[TMP27]], splat (i32 16)
; CHECK-NEXT: [[TMP29:%.*]] = trunc <4 x i32> [[TMP28]] to <4 x i8>
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 1
-; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP4]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP4]], i32 2
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 1
+; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP13]], <4 x ptr> [[VECTOR_GEP]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
+; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP21]], <4 x ptr> [[TMP30]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP29]], <4 x ptr> [[TMP31]], i32 1, <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
; CHECK-NEXT: [[PTR_IND3]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12
+; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index fa03e29ae0620..c62dcba7c1302 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -106,8 +106,8 @@ define hidden void @pointer_phi_v4i32_add3(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]]
@@ -539,8 +539,8 @@ define hidden void @pointer_phi_v4f32_add3(ptr noalias nocapture readonly %A, pt
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 12, i32 24, i32 36>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]]
@@ -743,10 +743,10 @@ define hidden void @pointer_phi_v4i32_uf2(ptr noalias nocapture readonly %A, ptr
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
@@ -808,12 +808,12 @@ define hidden void @pointer_phi_v4i32_uf4(ptr noalias nocapture readonly %A, ptr
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 192, i32 216, i32 240, i32 264>
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 288, i32 312, i32 336, i32 360>
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 192, i32 216, i32 240, i32 264>
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 96, i32 120, i32 144, i32 168>
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 24, i32 48, i32 72>
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i32 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i32 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
@@ -880,11 +880,11 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ], [ [[X]], [[ENTRY]] ]
; CHECK-NEXT: [[POINTER_PHI5:%.*]] = phi ptr [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 1
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP0]], i32 1, <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
index 25dac366ef73e..86536b705261f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll
@@ -170,10 +170,8 @@ define void @single_constant_stride_ptr_iv(ptr %p) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP8]], 0
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -764,10 +762,8 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; STRIDED-NEXT: [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
-; STRIDED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP13]]
-; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP15]], 0
+; STRIDED-NEXT: [[TMP18:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
@@ -775,23 +771,21 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
; STRIDED-NEXT: [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
; STRIDED-NEXT: [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
-; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
-; STRIDED-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; STRIDED-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 4
+; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP21]]
; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP13]]
-; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP23]], 0
+; STRIDED-NEXT: [[TMP26:%.*]] = mul i64 [[TMP13]], 0
; STRIDED-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
; STRIDED-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; STRIDED-NEXT: [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; STRIDED-NEXT: [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]]
; STRIDED-NEXT: [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]]
-; STRIDED-NEXT: [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]]
-; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
+; STRIDED-NEXT: [[VECTOR_GEP11:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP29]]
+; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP11]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
; STRIDED-NEXT: [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
-; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
+; STRIDED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
-; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]]
-; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]]
+; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP25]]
+; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP17]]
; STRIDED-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; STRIDED: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
index 8acf89abae2d7..6c7a882567a64 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll
@@ -27,8 +27,8 @@ define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1024, i64 2048, i64 3072>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 12288, i64 13312, i64 14336, i64 15360>
+; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0
; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384
@@ -85,8 +85,8 @@ define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1024, i64 2048, i64 3072>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 12288, i64 13312, i64 14336, i64 15360>
+; CHECK-NEXT: [[VECTOR_GEP4:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], i64 0
; CHECK-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call <4 x ptr> @llvm.masked.gather.v4p0.v4p0(<4 x ptr> [[TMP8]], i32 8, <4 x i1> splat (i1 true), <4 x ptr> poison)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16384
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index a4f2b077cb066..9531802f830fa 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -317,8 +317,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; DEFAULT: middle.block:
@@ -373,8 +373,8 @@ define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) {
; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; STRIDED: middle.block:
More information about the llvm-commits
mailing list