[llvm] [LV] Simplify recipes after convertToConcreteRecipes (PR #180235)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 6 09:14:08 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
With the caveat that there is one minor regression: an nuw of an shl is dropped now.
---
Full diff: https://github.com/llvm/llvm-project/pull/180235.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll (+9-37)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index db2c84d78fb15..5376770d18b22 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7444,6 +7444,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
+ VPlanTransforms::simplifyRecipes(BestVPlan);
// Convert the exit condition to AVLNext == 0 for EVL tail folded loops.
VPlanTransforms::convertEVLExitCond(BestVPlan);
// Regions are dissolved after optimizing for VF and UF, which completely
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index f2c2c636f92b9..d7a214350a854 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -288,7 +288,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
; IC2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP6]], i64 0
; IC2-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
+; IC2-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 1
; IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP7]]
; IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; IC2-NEXT: [[TMP10:%.*]] = mul i64 [[N_VEC]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 2f78e72e106d6..a66d70dd0c2a8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -860,24 +860,10 @@ define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
; CHECK: pred.urem.if61:
; CHECK-NEXT: br label [[PRED_UREM_CONTINUE62]]
; CHECK: pred.urem.continue62:
-; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[CMP_I]], <32 x i32> zeroinitializer, <32 x i32> poison
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i32> [[PREDPHI]], i32 0
-; CHECK-NEXT: [[TMP34:%.*]] = ashr i32 [[CONV5_I]], [[TMP33]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <32 x i32> poison, i32 [[TMP34]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT64:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT63]], <32 x i32> poison, <32 x i32> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP36:%.*]] = select <32 x i1> [[TMP60]], <32 x i1> poison, <32 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP37:%.*]] = or <32 x i1> [[TMP36]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <32 x i32> [[BROADCAST_SPLAT64]], zeroinitializer
-; CHECK-NEXT: [[TMP39:%.*]] = shl <32 x i32> [[PREDPHI]], splat (i32 24)
-; CHECK-NEXT: [[TMP40:%.*]] = ashr exact <32 x i32> [[TMP39]], splat (i32 24)
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP38]], i32 0
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], <32 x i32> [[TMP40]], <32 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI65:%.*]] = select <32 x i1> [[TMP37]], <32 x i32> [[TMP42]], <32 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[PREDPHI65]], i32 31
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
@@ -891,42 +877,28 @@ define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
; CHECK-NEXT: [[INDEX68:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT78:%.*]], [[PRED_UREM_CONTINUE76:%.*]] ]
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP44]], i32 0
; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_UREM_IF69:%.*]], label [[PRED_UREM_CONTINUE70:%.*]]
-; CHECK: pred.urem.if69:
+; CHECK: pred.urem.if66:
; CHECK-NEXT: br label [[PRED_UREM_CONTINUE70]]
-; CHECK: pred.urem.continue70:
+; CHECK: pred.urem.continue67:
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP44]], i32 1
; CHECK-NEXT: br i1 [[TMP46]], label [[PRED_UREM_IF71:%.*]], label [[PRED_UREM_CONTINUE72:%.*]]
-; CHECK: pred.urem.if71:
+; CHECK: pred.urem.if68:
; CHECK-NEXT: br label [[PRED_UREM_CONTINUE72]]
-; CHECK: pred.urem.continue72:
+; CHECK: pred.urem.continue69:
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP44]], i32 2
; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_UREM_IF73:%.*]], label [[PRED_UREM_CONTINUE74:%.*]]
-; CHECK: pred.urem.if73:
+; CHECK: pred.urem.if70:
; CHECK-NEXT: br label [[PRED_UREM_CONTINUE74]]
-; CHECK: pred.urem.continue74:
+; CHECK: pred.urem.continue71:
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP44]], i32 3
; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_UREM_IF75:%.*]], label [[PRED_UREM_CONTINUE76]]
-; CHECK: pred.urem.if75:
+; CHECK: pred.urem.if72:
; CHECK-NEXT: br label [[PRED_UREM_CONTINUE76]]
-; CHECK: pred.urem.continue76:
-; CHECK-NEXT: [[PREDPHI77:%.*]] = select i1 [[CMP_I]], <4 x i32> zeroinitializer, <4 x i32> poison
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[PREDPHI77]], i32 0
-; CHECK-NEXT: [[TMP50:%.*]] = ashr i32 [[CONV5_I]], [[TMP49]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT79:%.*]] = insertelement <4 x i32> poison, i32 [[TMP50]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT80:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT79]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK: pred.urem.continue73:
; CHECK-NEXT: [[INDEX_NEXT78]] = add nuw i32 [[INDEX68]], 4
; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT78]], 100
; CHECK-NEXT: br i1 [[TMP51]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP53:%.*]] = or <4 x i1> [[TMP52]], [[BROADCAST_SPLAT67]]
-; CHECK-NEXT: [[TMP54:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT80]], zeroinitializer
-; CHECK-NEXT: [[TMP55:%.*]] = shl <4 x i32> [[PREDPHI77]], splat (i32 24)
-; CHECK-NEXT: [[TMP56:%.*]] = ashr exact <4 x i32> [[TMP55]], splat (i32 24)
-; CHECK-NEXT: [[TMP57:%.*]] = extractelement <4 x i1> [[TMP54]], i32 0
-; CHECK-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], <4 x i32> [[TMP56]], <4 x i32> zeroinitializer
-; CHECK-NEXT: [[PREDPHI81:%.*]] = select <4 x i1> [[TMP53]], <4 x i32> [[TMP58]], <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i32> [[PREDPHI81]], i32 3
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 4, [[VEC_EPILOG_ITER_CHECK]] ], [ 100, [[ITER_CHECK:%.*]] ]
@@ -952,7 +924,7 @@ define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: exit:
-; CHECK-NEXT: [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ [[TMP59]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[P_2_LCSSA]]
;
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/180235
More information about the llvm-commits
mailing list