[llvm] [LV] Replace remaining LogicalAnd to vp.merge in EVL optimization. (PR #184068)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 15 19:25:20 PDT 2026
https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/184068
>From c049f252f2388c044991e9d52ef887ac553a2c98 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Mar 2026 19:05:47 -0700
Subject: [PATCH] Replace all logicalAnd(HeaderMask, Mask) if possible.
---
.../lib/Transforms/Vectorize/VPlanTransforms.cpp | 16 ++++++++++++++++
.../LoopVectorize/RISCV/low-trip-count.ll | 6 ++----
.../RISCV/tail-folding-complex-mask.ll | 10 +++-------
3 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 33e5b25bd9322..bea3d68416635 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3145,6 +3145,22 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
OldRecipes.push_back(R);
}
}
+
+ // Remove remaining LogicalAnd(HeaderMask, Mask) to vp.merge (True, Mask,
+ // False, EVL)
+ for (VPUser *U : collectUsersRecursively(HeaderMask)) {
+ VPValue *Mask;
+ if (match(U, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask)))) {
+ auto *LogicalAnd = cast<VPInstruction>(U);
+ auto *Merge = new VPWidenIntrinsicRecipe(
+ Intrinsic::vp_merge, {Plan.getTrue(), Mask, Plan.getFalse(), EVL},
+ TypeInfo.inferScalarType(Mask), {}, {}, LogicalAnd->getDebugLoc());
+ Merge->insertBefore(LogicalAnd);
+ LogicalAnd->replaceAllUsesWith(Merge);
+ OldRecipes.push_back(LogicalAnd);
+ }
+ }
+
// Erase old recipes at the end so we don't invalidate TypeInfo.
for (VPRecipeBase *R : reverse(OldRecipes)) {
SmallVector<VPValue *> PossiblyDead(R->operands());
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index e8366e8dcadc6..13a743e09ece0 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -291,11 +291,9 @@ define void @const_tc_with_predicated_store(i1 %c1, i1 %c2, i1 %c3, ptr %dst) #1
; CHECK-NEXT: [[TMP3:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i1> [[BROADCAST_SPLAT2]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <vscale x 4 x i32> [[TMP4]], splat (i32 57)
-; CHECK-NEXT: [[TMP6:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP2]], <vscale x 4 x i1> zeroinitializer, i32 57)
; CHECK-NEXT: [[TMP7:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i1> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i1> zeroinitializer, i32 57)
; CHECK-NEXT: [[TMP9:%.*]] = or <vscale x 4 x i1> [[TMP7]], [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i1> [[TMP9]], [[TMP10]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
index 1aa53e1ef95a0..db8c082e9a4b9 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
@@ -30,19 +30,15 @@ define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
-; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP7]], i64 0
-; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; IF-EVL-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
-; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT6]]
; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], i32 [[TMP7]])
+; IF-EVL-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP4]], <vscale x 4 x i1> zeroinitializer, i32 [[TMP7]])
; IF-EVL-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[VP_OP_LOAD]]
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> [[TMP4]], i32 [[TMP7]])
; IF-EVL-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[VP_OP_LOAD7]], [[PREDPHI]]
-; IF-EVL-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT: [[TMP14:%.*]] = or <vscale x 4 x i1> [[TMP4]], [[TMP6]]
-; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> zeroinitializer, i32 [[TMP7]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[TMP9]], [[TMP13]]
; IF-EVL-NEXT: [[PREDPHI8:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP12]]
; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[BROADCAST_SPLAT4]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[EVL_BASED_IV]]
More information about the llvm-commits
mailing list