[llvm] [LV] Pass symbolic VF to CalculateTripCountMinusVF and CanonicalIVIncrementForPart (NFC) (PR #180542)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 07:25:00 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Benjamin Maxwell (MacDue)
<details>
<summary>Changes</summary>
This makes it easier to update the runtime VF per VPlan.
---
Patch is 221.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180542.diff
27 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+7)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+1-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanHelpers.h (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+5-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+8-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+5-11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+30-50)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll (+6-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+3-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+3-7)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+213-248)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll (+3-9)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+1-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+88-96)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (+1-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+6-18)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+14-32)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+10-28)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll (+6-14)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll (+9-11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+1-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+54-68)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll (+1-3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b5978c670dd94..4c5defcc2aac6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -788,6 +788,13 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
return B.CreateElementCount(Ty, VFxStep);
}
+Value *createStep(IRBuilderBase &B, Type *Ty, Value *StepSize, int64_t Step) {
+ if (Step == 1)
+ return B.CreateZExtOrTrunc(StepSize, Ty);
+ return B.CreateMul(ConstantInt::get(Ty, Step),
+ B.CreateZExtOrTrunc(StepSize, Ty));
+}
+
/// Return the runtime value for VF.
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
return B.CreateElementCount(Ty, VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index eac5b58841e80..460f3357f2ce4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1153,7 +1153,7 @@ class VPIRMetadata {
/// predication.
class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
public VPIRMetadata,
- public VPUnrollPartAccessor<1> {
+ public VPUnrollPartAccessor<2> {
friend class VPlanSlp;
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index bab7e25cbf407..231043a2922fe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -50,6 +50,9 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
+/// Return a Step multiplied by a runtime StepSize.
+Value *createStep(IRBuilderBase &B, Type *Ty, Value *StepSize, int64_t Step);
+
/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
/// For integer induction, returns StartValue + Index * StepValue.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f6bafae3e2acb..3aed9fa43d8ae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -449,7 +449,6 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
case Instruction::Load:
case VPInstruction::BranchOnCond:
case VPInstruction::Broadcast:
- case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ExtractLastLane:
case VPInstruction::ExtractLastPart:
@@ -471,6 +470,7 @@ unsigned VPInstruction::getNumOperandsForOpcode() const {
case VPInstruction::PtrAdd:
case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
+ case VPInstruction::CalculateTripCountMinusVF:
return 2;
case Instruction::Select:
case VPInstruction::ActiveLaneMask:
@@ -628,7 +628,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::CalculateTripCountMinusVF: {
unsigned UF = getParent()->getPlan()->getUF();
Value *ScalarTC = State.get(getOperand(0), VPLane(0));
- Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
+ Value *VF = State.get(getOperand(1), VPLane(0));
+ Value *Step = createStep(Builder, ScalarTC->getType(), VF, UF);
Value *Sub = Builder.CreateSub(ScalarTC, Step);
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
Value *Zero = ConstantInt::getNullValue(ScalarTC->getType());
@@ -653,10 +654,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::CanonicalIVIncrementForPart: {
unsigned Part = getUnrollPart(*this);
auto *IV = State.get(getOperand(0), VPLane(0));
+ auto *VF = State.get(getOperand(1), VPLane(0));
assert(Part != 0 && "Must have a positive part");
// The canonical IV is incremented by the vectorization factor (num of
// SIMD elements) times the unroll part.
- Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
+ Value *Step = createStep(Builder, IV->getType(), VF, Part);
return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
hasNoSignedWrap());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 839b4cdf4219c..7a36209cb912f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1978,7 +1978,7 @@ static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF,
uint64_t Part;
if (match(Index,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
- m_VPValue(), m_ConstantInt(Part))))
+ m_VPValue(), m_VPValue(), m_ConstantInt(Part))))
Phis[Part] = Phi;
else
// Anything other than a CanonicalIVIncrementForPart is part 0
@@ -2824,6 +2824,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Create the ActiveLaneMask instruction using the correct start values.
VPValue *TC = Plan.getTripCount();
+ VPValue *VF = &Plan.getVF();
VPValue *TripCount, *IncrementValue;
if (!DataAndControlFlowWithoutRuntimeCheck) {
@@ -2838,11 +2839,11 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// done after the active.lane.mask intrinsic is called.
IncrementValue = CanonicalIVPHI;
TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
- {TC}, DL);
+ {TC, VF}, DL);
}
auto *EntryIncrement = Builder.createOverflowingOp(
- VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
- "index.part.next");
+ VPInstruction::CanonicalIVIncrementForPart, {StartV, VF}, {false, false},
+ DL, "index.part.next");
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier =
@@ -2861,9 +2862,9 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// original terminator.
VPRecipeBase *OriginalTerminator = EB->getTerminator();
Builder.setInsertPoint(OriginalTerminator);
- auto *InLoopIncrement =
- Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
- {IncrementValue}, {false, false}, DL);
+ auto *InLoopIncrement = Builder.createOverflowingOp(
+ VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue, &Plan.getVF()}, {false, false}, DL);
auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{InLoopIncrement, TripCount, ALMMultiplier},
DL, "active.lane.mask.next");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 53cac9fcd80d6..09481b8cf6d42 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -467,7 +467,7 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
auto *VPI = dyn_cast<VPInstruction>(&R);
if (VPI &&
VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
- VPI->getNumOperands() == 1) {
+ VPI->getNumOperands() == 2) {
VPI->replaceAllUsesWith(VPI->getOperand(0));
VPI->eraseFromParent();
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 4f441e6834fc7..457731a50c768 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -509,13 +509,11 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]]
; PRED-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
-; PRED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
+; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; PRED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
; PRED-NEXT: [[L_C:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META3:![0-9]+]]
; PRED-NEXT: [[L_B:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META6:![0-9]+]]
; PRED-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
-; PRED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2
; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP0]], [[TMP9]]
; PRED-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], [[TMP9]]
; PRED-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0
@@ -536,7 +534,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED-NEXT: [[GEP_D:%.*]] = getelementptr i32, ptr [[D]], i64 [[IV]]
; PRED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT31]], <vscale x 4 x ptr> align 4 [[BROADCAST_SPLAT]], <vscale x 4 x i1> [[TMP15]]), !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr align 4 [[GEP_D]], <vscale x 4 x i1> [[TMP15]]), !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
-; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP4]]
+; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP9]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[IV]], i64 [[TMP12]])
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; PRED-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true
@@ -621,8 +619,6 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: [[ENTRY:.*:]]
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
-; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
@@ -641,7 +637,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP13:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
; PRED-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[TMP13]], ptr align 8 [[NEXT_GEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
-; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
+; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]])
; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true
@@ -1326,8 +1322,6 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; PRED-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
-; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]
@@ -1360,7 +1354,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; PRED-NEXT: [[TMP26:%.*]] = fptoui <vscale x 16 x float> [[TMP25]] to <vscale x 16 x i8>
; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]]
; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr align 1 [[TMP27]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
+; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]])
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
index 5d550dc07ce4b..1d22f63bbdf03 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -105,8 +105,6 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]]
@@ -116,7 +114,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -137,7 +135,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr align 8 [[TMP34]], <vscale x 2 x i1> [[TMP23]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true
@@ -217,8 +215,6 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]]
@@ -228,7 +224,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
@@ -252,7 +248,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]]
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr align 4 [[TMP38]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index aa6e4df26d71b..fc5fe0042c82a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -133,10 +133,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
-; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4
-; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP0]], [[TMP7]]
-; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], [[TMP7]]
+; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP0]], [[TMP5]]
+; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], [[TMP5]]
; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP0]])
; PRED-NEXT: [[TMP11:%.*]] = trunc <vscale x 16 x i32> [[BROADCAST_SPLAT]] to <vscale x 16 x i16>
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 00c7e6eecfb2c..df622157af8d2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -72,30 +72,26 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP3]], 1
-; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP9]], 1
-; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP3]], 1
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
-; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
+; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180542
More information about the llvm-commits
mailing list