[llvm] [VPlan] Replicate VPScalarIVStepsRecipe by VF outside replicate regions. (PR #170053)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 30 13:55:55 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Extend replicateByVF to also handle VPScalarIVStepsRecipe. To do so, the
patch adds a new lane operand to VPScalarIVStepsRecipe, which is only
added when replicating. This enables removing a number of lane 0
computations. The lane operand will also be used to explicitly replicate
replicate regions in a follow-up.
Depends on https://github.com/llvm/llvm-project/pull/169796 (included in
PR).
---
Patch is 646.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170053.diff
77 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+5-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+13-23)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+19-9)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+28-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll (+14-15)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll (+13-14)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll (+11-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll (+141-144)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+141-144)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+66-111)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll (+40-42)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll (+6-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll (+37-39)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+3-6)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (-3)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll (+2-5)
- (modified) llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+5-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll (+3-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll (+21-17)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll (+69-72)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+51-53)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr36524.ll (-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr72969.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll (+12-21)
- (modified) llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll (+28-34)
- (modified) llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll (-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll (+13-23)
- (modified) llvm/test/Transforms/LoopVectorize/cse-casts.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/debugloc.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll (+30-18)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+28-24)
- (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll (+84-76)
- (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll (+53-60)
- (modified) llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+22-30)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+3-2)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll (+5-6)
- (modified) llvm/test/Transforms/LoopVectorize/iv_outside_user.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/loop-form.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll (+2-8)
- (modified) llvm/test/Transforms/LoopVectorize/operand-bundles.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+2-11)
- (modified) llvm/test/Transforms/LoopVectorize/predicate-switch.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (+24-14)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/uniform-blend.ll (+3-4)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll (+175-188)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll (+100-107)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll (+40-41)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll (+242-256)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll (+504-528)
- (modified) llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll (+4-3)
- (modified) llvm/test/Transforms/LoopVectorize/version-mem-access.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-predicate-switch.ll (+3-5)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6ca750fc53279..294af92ee2496 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3788,7 +3788,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
- public VPUnrollPartAccessor<3> {
+ public VPUnrollPartAccessor<4> {
Instruction::BinaryOps InductionOpcode;
public:
@@ -3812,10 +3812,13 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
~VPScalarIVStepsRecipe() override = default;
VPScalarIVStepsRecipe *clone() override {
- return new VPScalarIVStepsRecipe(
+ auto *NewR = new VPScalarIVStepsRecipe(
getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(),
getDebugLoc());
+ if (getNumOperands() == 4)
+ NewR->addOperand(getOperand(3));
+ return NewR;
}
/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0baf7172e4443..601c9c9063ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2367,22 +2367,22 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// Compute the scalar steps and save the results in State.
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
- Type *VecIVTy = nullptr;
- Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
- if (!FirstLaneOnly && State.VF.isScalable()) {
- VecIVTy = VectorType::get(BaseIVTy, State.VF);
- UnitStepVec =
- Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
- SplatStep = Builder.CreateVectorSplat(State.VF, Step);
- SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
- }
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
if (State.Lane) {
StartLane = State.Lane->getKnownLane();
EndLane = StartLane + 1;
+ } else if (getNumOperands() == 5) {
+ // Operand 3 is the Lane operand (when present after replicating by VF).
+ VPValue *Op3 = getOperand(3);
+ assert(Op3->isLiveIn() && "lane operand must be a live-in");
+ auto *C = cast<ConstantInt>(Op3->getLiveInIRValue());
+ unsigned Val = C->getZExtValue();
+ StartLane = Val;
+ EndLane = Val + 1;
}
+
Value *StartIdx0;
if (getUnrollPart(*this) == 0)
StartIdx0 = ConstantInt::get(IntStepTy, 0);
@@ -2396,19 +2396,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
}
- if (!FirstLaneOnly && State.VF.isScalable()) {
- auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
- auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
- if (BaseIVTy->isFloatingPointTy())
- InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
- auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
- auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
- State.set(this, Add);
- // It's useful to record the lane values too for the known minimum number
- // of elements so we do those below. This improves the code quality when
- // trying to extract the first element, for example.
- }
-
if (BaseIVTy->isFloatingPointTy())
StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
@@ -2422,7 +2409,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
"scalable");
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
- State.set(this, Add, VPLane(Lane));
+ if (State.Lane)
+ State.set(this, Add, VPLane(Lane));
+ else
+ State.set(this, Add, VPLane(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b12f8ccc73c7e..dcef3f54d5c2a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -793,12 +793,17 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
WideIV->getDebugLoc(), Builder);
// Update scalar users of IV to use Step instead.
- if (!HasOnlyVectorVFs)
+ if (!HasOnlyVectorVFs) {
WideIV->replaceAllUsesWith(Steps);
- else
- WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
- return U.usesScalars(WideIV);
- });
+ } else {
+ bool HasScalableVF = Plan.hasScalableVF();
+ WideIV->replaceUsesWithIf(Steps,
+ [WideIV, HasScalableVF](VPUser &U, unsigned) {
+ if (HasScalableVF)
+ return U.usesFirstLaneOnly(WideIV);
+ return U.usesScalars(WideIV);
+ });
+ }
}
}
@@ -1432,9 +1437,14 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
}
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
- // the first lane is demanded.
+ // the first lane is demanded and both Lane and UnrollPart operands are 0.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
- if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
+ bool LaneIsZero = Steps->getNumOperands() >= 4 &&
+ match(Steps->getOperand(3), m_ZeroInt());
+ bool PartIsZero =
+ Steps->getNumOperands() < 5 || match(Steps->getOperand(4), m_ZeroInt());
+ if (Steps->isPart0() && LaneIsZero && PartIsZero &&
+ vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
@@ -4306,9 +4316,9 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (!isa<VPReplicateRecipe, VPInstruction>(&R))
+ if (!isa<VPScalarIVStepsRecipe, VPReplicateRecipe, VPInstruction>(&R))
continue;
- auto *DefR = cast<VPRecipeWithIRFlags>(&R);
+ auto *DefR = cast<VPSingleDefRecipe>(&R);
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index f215476b1e163..003686490d42c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -137,6 +137,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
+ ScalarIVSteps->addOperand(getConstantInt(0));
ScalarIVSteps->addOperand(getConstantInt(Part));
}
@@ -526,9 +527,21 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
/*IsSingleScalar=*/true, /*Mask=*/nullptr,
*RepR, *RepR, RepR->getDebugLoc());
} else {
- assert(isa<VPInstruction>(DefR) &&
+ assert((isa<VPInstruction, VPScalarIVStepsRecipe>(DefR)) &&
"DefR must be a VPReplicateRecipe or VPInstruction");
New = DefR->clone();
+ if (isa<VPScalarIVStepsRecipe>(New)) {
+ // Add or update lane operand for VPScalarIVStepsRecipe.
+ if (NewOps.size() == 3) {
+ NewOps.push_back(Plan.getConstantInt(IdxTy, 0));
+ New->addOperand(NewOps.back());
+ }
+ NewOps.push_back(Plan.getConstantInt(IdxTy, Lane.getKnownLane()));
+ New->addOperand(NewOps.back());
+ if (NewOps.size() == 5)
+ std::swap(NewOps[3], NewOps[4]);
+ }
+
for (const auto &[Idx, Op] : enumerate(NewOps)) {
New->setOperand(Idx, Op);
}
@@ -558,7 +571,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
+ if (!isa<VPInstruction, VPReplicateRecipe, VPScalarIVStepsRecipe>(&R) ||
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
@@ -566,6 +579,19 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
continue;
+ if (isa<VPScalarIVStepsRecipe>(&R) && Plan.hasScalarVFOnly()) {
+ // Add lane operand to VPScalarIVStepsRecipe only when the plan is
+ // scalar.
+ if (R.getNumOperands() == 4) {
+ R.addOperand(R.getOperand(3));
+ R.setOperand(3, Plan.getConstantInt(IdxTy, 0));
+ } else {
+ R.addOperand(Plan.getConstantInt(IdxTy, 0));
+ R.addOperand(Plan.getConstantInt(IdxTy, 0));
+ }
+ continue;
+ }
+
auto *DefR = cast<VPSingleDefRecipe>(&R);
VPBuilder Builder(DefR);
if (DefR->getNumUsers() == 0) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
index 14f5dd7d41691..46fc9646356c8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll
@@ -16,11 +16,10 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
@@ -28,31 +27,31 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
-; CHECK: [[PRED_STORE_IF5]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
+; CHECK: [[PRED_STORE_IF4]]:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP9]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
-; CHECK: [[PRED_STORE_CONTINUE6]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
+; CHECK: [[PRED_STORE_CONTINUE5]]:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
-; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
-; CHECK: [[PRED_STORE_IF7]]:
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]]
+; CHECK: [[PRED_STORE_IF6]]:
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP3]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE8]]
-; CHECK: [[PRED_STORE_CONTINUE8]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
+; CHECK: [[PRED_STORE_CONTINUE7]]:
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
-; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
-; CHECK: [[PRED_STORE_IF9]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
+; CHECK: [[PRED_STORE_IF8]]:
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP4]], i64 1
; CHECK-NEXT: store i8 0, ptr [[TMP13]], align 1
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE10]]
-; CHECK: [[PRED_STORE_CONTINUE10]]:
+; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
+; CHECK: [[PRED_STORE_CONTINUE9]]:
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 26a9545764091..33be739be4718 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -20,10 +20,9 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
index 2557ae55d2c85..b396d584a8497 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll
@@ -21,31 +21,30 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
-; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP11]], align 4, !alias.scope [[META0]]
-; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP13:%.*]] = load i24, ptr [[TMP12]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i24, ptr [[TMP8]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP15:%.*]] = load i24, ptr [[TMP9]], align 4, !alias.scope [[META0]]
+; CHECK-NEXT: [[TMP16:%.*]] = load i24, ptr [[TMP10]], align 4, !alias.scope [[META0]]
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i24> poison, i24 [[TMP13]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i24> [[TMP17]], i24 [[TMP14]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i24> [[TMP18]], i24 [[TMP15]], i32 2
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3
; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32>
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
index 5b4bb70e6a479..2108c15b7f838 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll
@@ -174,11 +174,10 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
-; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 2
+; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/170053
More information about the llvm-commits
mailing list