[llvm] [VPlan] Compute scalable VF in preheader for induction increment. (PR #74762)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 12:43:11 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
UF * VF is loop invariant and can be computed directly in the preheader.
This prepares the code for #<!-- -->74333 and reduces the test changes.
---
Patch is 473.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74762.diff
60 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+7-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+28-28)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_prefer_scalable.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+17-17)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll (+19-19)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll (+33-33)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll (+44-44)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll (+33-33)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll (+23-23)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+114-114)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll (+20-20)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll (+54-54)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll (+9-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll (+18-18)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+35-35)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll (+22-22)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll (+6-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll (+50-50)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll (+13-13)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll (+20-20)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vfabi.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll (+18-18)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/uniform-args-call-variants.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/wider-VF-for-callinst.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll (+19-19)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+99-99)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll (+7-7)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll (+14-14)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll (+22-22)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll (+33-33)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (+60-60)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+16-16)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+30-30)
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_scalable.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-inductions.ll (+65-65)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll (+14-14)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-loop-unpredicated-body-scalar-tail.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-trunc-min-bitwidth.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1891b211a3566..a405644035993 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -340,8 +340,13 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
auto *Phi = State.get(getOperand(0), 0);
// The loop step is equal to the vectorization factor (num of SIMD
// elements) times the unroll factor (num of SIMD instructions).
- Value *Step =
- createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
+ Value *Step;
+ {
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(VectorPH->getTerminator());
+ Step = createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
+ }
return Builder.CreateAdd(Phi, Step, Name, hasNoUnsignedWrap(),
hasNoSignedWrap());
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
index 6b5d69d100dde..8c50d86489c9d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll
@@ -18,16 +18,16 @@ define void @f1(ptr %A) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
index 98d55ae15c077..763b3e0bc8293 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll
@@ -54,6 +54,8 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]
; SVE: vector.body:
; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -66,8 +68,6 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]]
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
-; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
index 28962dfba8924..8e2efd86c1f26 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll
@@ -19,6 +19,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -27,8 +29,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_LOAD]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP5]], ptr [[TMP6]], align 8
-; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -61,6 +61,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
+; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -70,8 +72,6 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFCOMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; TFCOMMON-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -111,6 +111,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -122,8 +124,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP7]], <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> [[TMP6]]
; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8
-; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -162,6 +162,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
+; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
+; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -177,8 +179,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFCOMMON-NEXT: [[TMP12:%.*]] = or <vscale x 2 x i1> [[TMP7]], [[TMP10]]
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP11]], i32 8, <vscale x 2 x i1> [[TMP12]])
-; TFCOMMON-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP14]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP15:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -229,6 +229,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -241,8 +243,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]]
; TFNONE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP9]], align 8
-; TFNONE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -284,6 +284,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFCOMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
+; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2
; TFCOMMON-NEXT: br label [[VECTOR_BODY:%.*]]
; TFCOMMON: vector.body:
; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -300,8 +302,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFCOMMON-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; TFCOMMON-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i1> [[TMP8]], [[TMP10]]
; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr [[TMP12]], i32 8, <vscale x 2 x i1> [[TMP13]])
-; TFCOMMON-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; TFCOMMON-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]]
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFCOMMON-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -355,6 +355,8 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -363,8 +365,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP5]], ptr [[TMP6]], align 8
-; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -413,6 +413,8 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFFALLBACK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; TFFALLBACK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]]
; TFFALLBACK: vector.body:
; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -421,8 +423,6 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
; TFFALLBACK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP5]], ptr [[TMP6]], align 8
-; TFFALLBACK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TFFALLBACK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -475,6 +475,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]]
; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
+; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]]
; TFNONE: vector.body:
; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -483,8 +485,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @foo_vector_nomask(<vscale x 2 x i64> [[WIDE_LOAD]])
; TFNONE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP5]], ptr [[TMP6]], align 8
-; TFNONE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; TFNONE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -517,6 +517,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; TFALWAYS-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025)
+; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFALWAYS-NEXT: br label [[VECTOR_BODY:%.*]]
; TFALWAYS: vector.body:
; TFALWAYS-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -526,8 +528,6 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFALWAYS-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; TFALWAYS-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; TFALWAYS-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; TFALWAYS-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2
; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025)
; TFALWAYS-NEXT: [[TMP10:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
@@ -547,6 +547,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/74762
More information about the llvm-commits
mailing list