[llvm] [VPlan] Simplify branch-cond with getVectorTripCount (PR #155604)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 02:16:57 PST 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/155604
>From 5144fd8d65629603e70adc38ed567c367c7b7fbf Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 27 Aug 2025 12:53:04 +0100
Subject: [PATCH 1/3] [LV] Simplify branch-cond with getVectorTripCount
Call getVectorTripCount first, and call getTripCount failing that, in
simplifyBranchConditionForVFAndUF, to simplify missed cases. While at
it, strip the dead check for a zero TC.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 13 ++++----
.../LoopVectorize/RISCV/low-trip-count.ll | 3 +-
.../X86/limit-vf-by-tripcount.ll | 33 ++++++++-----------
.../LoopVectorize/X86/load-deref-pred.ll | 3 +-
.../vector-loop-backedge-elimination.ll | 32 +++++++-----------
5 files changed, 34 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 82bf79e7bd876..fc3216f4e0486 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1747,14 +1747,15 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
// Try to simplify the branch condition if TC <= VF * UF when the latch
// terminator is BranchOnCount or BranchOnCond where the input is
// Not(ActiveLaneMask).
- const SCEV *TripCount =
- vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
- assert(!isa<SCEVCouldNotCompute>(TripCount) &&
+ const SCEV *VectorTripCount =
+ vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
+ if (isa<SCEVCouldNotCompute>(VectorTripCount))
+ VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
+ assert(!isa<SCEVCouldNotCompute>(VectorTripCount) &&
"Trip count SCEV must be computable");
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
- const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
- if (TripCount->isZero() ||
- !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
+ const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements);
+ if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C))
return false;
} else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) {
// For BranchOnCond, check if we can prove the condition to be true using VF
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index 8ef53cade01ac..345f6f632158a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -295,8 +295,7 @@ define i8 @mul_non_pow_2_low_trip_count(ptr noalias %a) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1]] = mul <8 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
-; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> [[TMP1]])
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
index c1272e56836f8..6e3b2a5390948 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll
@@ -12,27 +12,22 @@ define void @test_tc_17_no_epilogue_vectorization(ptr noalias %src, ptr noalias
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 64
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 64
+; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3:%.*]], align 64
+; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ 16, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I]]
+; CHECK-NEXT: [[LDADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I]]
; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[LDADDR]], align 64
-; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I]]
+; CHECK-NEXT: [[STADDR:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[I]]
; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 17
-; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -69,11 +64,11 @@ define void @test_tc_18(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 64
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -140,7 +135,7 @@ define void @test_tc_19(ptr noalias %src, ptr noalias %dst) {
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF4]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -219,7 +214,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) {
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -231,7 +226,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD5]], ptr [[TMP15]], align 64
; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], 20
-; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
@@ -245,7 +240,7 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: store i8 [[VAL]], ptr [[STADDR]], align 64
; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1
; CHECK-NEXT: [[IS_NEXT:%.*]] = icmp ult i64 [[I_NEXT]], 20
-; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[IS_NEXT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -281,7 +276,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src,
; CHECK-NEXT: store <8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
@@ -294,7 +289,7 @@ define void @limit_main_loop_vf_to_avoid_dead_main_vector_loop(ptr noalias %src,
; CHECK-NEXT: store i8 [[L]], ptr [[GEP_DST]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 32
-; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 8771dc9a20379..6605338771c47 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -2581,8 +2581,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
; CHECK-NEXT: [[TMP114]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]]
; CHECK-NEXT: [[TMP115]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP116:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT: br i1 [[TMP116]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP113]], [[TMP112]]
; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]]
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
index e160a15ece47d..bba459f776050 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll
@@ -1140,18 +1140,14 @@ define void @test_vector_tc_eq_16(ptr %A) {
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
; VF8UF2: [[VECTOR_BODY]]:
-; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
-; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
-; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
+; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8
+; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
-; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1
+; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1
-; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF8UF2: [[MIDDLE_BLOCK]]:
; VF8UF2-NEXT: br label %[[SCALAR_PH:.*]]
; VF8UF2: [[SCALAR_PH]]:
@@ -1165,7 +1161,7 @@ define void @test_vector_tc_eq_16(ptr %A) {
; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
-; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF8UF2: [[EXIT]]:
; VF8UF2-NEXT: ret void
;
@@ -1177,14 +1173,10 @@ define void @test_vector_tc_eq_16(ptr %A) {
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
; VF16UF1: [[VECTOR_BODY]]:
-; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
-; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
+; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[A]], align 1
; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
-; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
-; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[A]], align 1
+; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; VF16UF1: [[MIDDLE_BLOCK]]:
; VF16UF1-NEXT: br label %[[SCALAR_PH:.*]]
; VF16UF1: [[SCALAR_PH]]:
@@ -1198,7 +1190,7 @@ define void @test_vector_tc_eq_16(ptr %A) {
; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
-; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; VF16UF1: [[EXIT]]:
; VF16UF1-NEXT: ret void
;
@@ -1232,12 +1224,10 @@ exit:
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
-; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
-; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
>From f38d9d79f364a91f426211b745d54ddf50ed274e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Mon, 3 Nov 2025 15:09:16 +0000
Subject: [PATCH 2/3] [VPlan] Update comment
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fc3216f4e0486..9684b5e980f20 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1744,9 +1744,8 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
if (match(Term, m_BranchOnCount()) ||
match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask(
m_VPValue(), m_VPValue(), m_VPValue()))))) {
- // Try to simplify the branch condition if TC <= VF * UF when the latch
- // terminator is BranchOnCount or BranchOnCond where the input is
- // Not(ActiveLaneMask).
+ // Try to simplify the branch condition if VectorTC <= VF * UF when the
+ // latch terminator is BranchOnCount or BranchOnCond(Not(ActiveLaneMask)).
const SCEV *VectorTripCount =
vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE);
if (isa<SCEVCouldNotCompute>(VectorTripCount))
>From d6f6e4e8a73d8f6d96cdb23977491959c566d27f Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 6 Nov 2025 10:13:43 +0000
Subject: [PATCH 3/3] [LV] Test update after rebase
---
.../LoopVectorize/AArch64/partial-reduce-dot-product.ll | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index d77ca9875bf01..37eac89acfd11 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -1589,8 +1589,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw <16 x i64> [[TMP8]], [[TMP9]]
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI1]], <16 x i64> [[TMP10]])
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
-; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-INTERLEAVED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
More information about the llvm-commits
mailing list