[llvm] [VPlan] Also duplicated scalar-steps when it enables sinking scalars. (PR #136021)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 13:28:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Extend sinking logic to duplicate scalar steps recipe if it enables sinking, that is if all users in a destination block require all lanes.
This should be the last step before removing legacy sinkScalarOperands.
---
Patch is 106.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136021.diff
18 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+10-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll (+19-19)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+10-7)
- (modified) llvm/test/Transforms/LoopVectorize/debugloc.ll (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll (+15-15)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/if-pred-stores.ll (+50-50)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+56-56)
- (modified) llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll (+46-46)
- (modified) llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll (+44-44)
- (modified) llvm/test/Transforms/LoopVectorize/loop-form.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/struct-return.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+2-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+2-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d0cb1c3ac590f..c89a07e11c5cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -164,7 +164,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
return true;
NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
// We only know how to duplicate VPRecipeRecipes for now.
- return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
+ return NeedsDuplicating &&
+ isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
};
if (!all_of(SinkCandidate->users(), CanSinkWithUser))
continue;
@@ -172,9 +173,14 @@ static bool sinkScalarOperands(VPlan &Plan) {
if (NeedsDuplicating) {
if (ScalarVFOnly)
continue;
- Instruction *I = SinkCandidate->getUnderlyingInstr();
- auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
- // TODO: add ".cloned" suffix to name of Clone's VPValue.
+ VPSingleDefRecipe *Clone;
+ if (isa<VPReplicateRecipe>(SinkCandidate)) {
+ Instruction *I = SinkCandidate->getUnderlyingInstr();
+ Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
+ // TODO: add ".cloned" suffix to name of Clone's VPValue.
+ } else {
+ Clone = SinkCandidate->clone();
+ }
Clone->insertBefore(SinkCandidate);
SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
index e302bf195ef8e..3c8bbaa46f275 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll
@@ -211,8 +211,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
-; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
@@ -224,7 +223,8 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 0
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[IV]]
+; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[IV]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP72]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 0
; CHECK-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
@@ -232,7 +232,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; CHECK: [[PRED_STORE_IF1]]:
-; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
@@ -241,7 +241,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; CHECK: [[PRED_STORE_IF3]]:
-; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[IV]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
@@ -250,7 +250,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; CHECK: [[PRED_STORE_IF5]]:
-; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[IV]], 3
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1
@@ -259,7 +259,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; CHECK: [[PRED_STORE_IF7]]:
-; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
+; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[IV]], 4
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]]
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1
@@ -268,7 +268,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; CHECK: [[PRED_STORE_IF9]]:
-; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
+; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[IV]], 5
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1
@@ -277,7 +277,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; CHECK: [[PRED_STORE_IF11]]:
-; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
+; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[IV]], 6
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1
@@ -286,7 +286,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; CHECK: [[PRED_STORE_IF13]]:
-; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
+; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[IV]], 7
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1
@@ -295,7 +295,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
; CHECK: [[PRED_STORE_IF15]]:
-; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
+; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[IV]], 8
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]]
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1
@@ -304,7 +304,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
; CHECK: [[PRED_STORE_IF17]]:
-; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
+; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[IV]], 9
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]]
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1
@@ -313,7 +313,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10
; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
; CHECK: [[PRED_STORE_IF19]]:
-; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
+; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[IV]], 10
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]]
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1
@@ -322,7 +322,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11
; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
; CHECK: [[PRED_STORE_IF21]]:
-; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
+; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[IV]], 11
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]]
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1
@@ -331,7 +331,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
; CHECK: [[PRED_STORE_IF23]]:
-; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
+; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[IV]], 12
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1
@@ -340,7 +340,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13
; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
; CHECK: [[PRED_STORE_IF25]]:
-; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
+; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[IV]], 13
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]]
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1
@@ -349,7 +349,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14
; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
; CHECK: [[PRED_STORE_IF27]]:
-; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
+; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[IV]], 14
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]]
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1
@@ -358,13 +358,13 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
; CHECK: [[PRED_STORE_IF29]]:
-; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
+; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[IV]], 15
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]]
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
; CHECK: [[PRED_STORE_CONTINUE30]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[IV]], 16
; CHECK-NEXT: [[TMP71:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP71]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index d8713bdda689a..827612cfe36d5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -1045,12 +1045,12 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]]
; TF-FIXEDLEN: [[VECTOR_BODY]]:
; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
-; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
-; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
-; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 1025)
+; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
+; TF-FIXEDLEN-NEXT: br i1 [[TMP0]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; TF-FIXEDLEN: [[PRED_STORE_IF]]:
-; TF-FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B]], align 8
+; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; TF-FIXEDLEN-NEXT: store i64 [[TMP1]], ptr [[B]], align 8
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE]]
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE]]:
; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
@@ -1074,7 +1074,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8
; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; TF-FIXEDLEN: [[PRED_STORE_CONTINUE6]]:
-; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TF-FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index a1c727f62ba7a..ceedb787054a0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -603,11 +603,10 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ]
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12
-; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw i64 [[TMP6]], 2
@@ -630,6 +629,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP24:%.*]] = shl nsw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP24]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP25]], align 8
@@ -665,7 +665,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
; CHECK: pred.store.if14:
-; CHECK-NEXT: [[TMP39:%.*]] = shl nsw i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP39:%.*]] = shl nsw i64 [[TMP88]], 2
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP39]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP40]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE15]]
@@ -700,7 +701,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP21]], i32 0
; CHECK-NEXT: br i1 [[TMP53]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
; CHECK: pred.store.if22:
-; CHECK-NEXT: [[TMP54:%.*]] = shl nsw i64 [[TMP5]], 2
+; CHECK-NEXT: [[TMP107:%.*]] = add i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP54:%.*]] = shl nsw i64 [[TMP107]], 2
; CHECK-NEXT: [[TMP55:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP54]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP55]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE23]]
@@ -735,7 +737,8 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <4 x i1> [[TMP22]], i32 0
; CHECK-NEXT: br i1 [[TMP68]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
; CHECK: pred.store.if30:
-; CHECK-NEXT: [[TMP69:%.*]] = shl nsw i64 [[TMP6]], 2
+; CHECK-NEXT: [[TMP108:%.*]] = add i64 [[INDEX]], 12
+; CHECK-NEXT: [[TMP69:%.*]] = shl nsw i64 [[TMP108]], 2
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP69]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP70]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE31]]
@@ -785,8 +788,7 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 {
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ]
-; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0
-; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[TMP86]], 2
+; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[INDEX40]], 2
; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]]
; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8
; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x d...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136021
More information about the llvm-commits
mailing list