[llvm] [VPlan] Move findCommonEdgeMask optimization to simplifyBlends (PR #156304)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 17:53:11 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/156304
>From e235fd2cafa2cbe48a43b1a31c060ebbd89236c5 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 1 Sep 2025 18:20:38 +0800
Subject: [PATCH 1/3] [VPlan] Move findCommonEdgeMask optimization to
simplifyBlends
Following up from #150368, this moves folding common edge masks into simplifyBlends.
One test in uniform-blend.ll ended up regressing but after looking at it closely, it came from a weird (x && !x) edge mask. So I've just included a simplifcation in this PR to fold that to false.
This is an alternative to #150369.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++
.../Transforms/Vectorize/VPlanPredicator.cpp | 28 -------------------
.../Transforms/Vectorize/VPlanTransforms.cpp | 26 +++++++++++++++++
.../Transforms/LoopVectorize/uniform-blend.ll | 5 ++--
4 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index eeeab22f0195b..fc289d428eabd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2413,6 +2413,12 @@ class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
}
+ /// Set mask number \p Idx to \p V.
+ void setMask(unsigned Idx, VPValue *V) {
+ assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
+ Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
+ }
+
void execute(VPTransformState &State) override {
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 2e9a36adbbf3c..0c27d535b680e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -67,10 +67,6 @@ class VPPredicator {
return EdgeMaskCache[{Src, Dst}] = Mask;
}
- /// Given a phi \p PhiR, try to see if its incoming blocks all share a common
- /// edge and return its mask.
- VPValue *findCommonEdgeMask(const VPPhi *PhiR) const;
-
public:
/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
VPValue *getEdgeMask(const VPBasicBlock *Src, const VPBasicBlock *Dst) const {
@@ -232,21 +228,6 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
setEdgeMask(Src, DefaultDst, DefaultMask);
}
-VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const {
- VPValue *EdgeMask = getEdgeMask(PhiR->getIncomingBlock(0), PhiR->getParent());
- VPValue *CommonEdgeMask;
- if (!EdgeMask ||
- !match(EdgeMask, m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue())))
- return nullptr;
- for (const VPBasicBlock *InVPBB : drop_begin(PhiR->incoming_blocks())) {
- EdgeMask = getEdgeMask(InVPBB, PhiR->getParent());
- assert(EdgeMask && "Both null and non-null edge masks found");
- if (!match(EdgeMask, m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue())))
- return nullptr;
- }
- return CommonEdgeMask;
-}
-
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
SmallVector<VPPhi *> Phis;
for (VPRecipeBase &R : VPBB->phis())
@@ -258,7 +239,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
// be duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR);
SmallVector<VPValue *, 2> OperandsWithMask;
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
OperandsWithMask.push_back(InVPV);
@@ -269,14 +249,6 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
break;
}
- // If all incoming blocks share a common edge, remove it from the mask.
- if (CommonEdgeMask) {
- VPValue *X;
- if (match(EdgeMask,
- m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue(X))))
- EdgeMask = X;
- }
-
OperandsWithMask.push_back(EdgeMask);
}
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 17b54f2ef9c05..989ab37e34c92 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1092,6 +1092,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ // x && !x -> 0
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X))))) {
+ Def->replaceAllUsesWith(Plan->getOrAddLiveIn(
+ ConstantInt::getFalse(VPTypeAnalysis(*Plan).inferScalarType(Def))));
+ return;
+ }
+
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return Def->replaceAllUsesWith(X);
@@ -1293,6 +1300,23 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
}
}
+/// Try to see if all of \p Blend's masks share a common value logically and'ed
+/// and remove it from the masks.
+static void removeCommonBlendMask(VPBlendRecipe *Blend) {
+ if (Blend->isNormalized())
+ return;
+ VPValue *CommonEdgeMask;
+ if (!match(Blend->getMask(0),
+ m_LogicalAnd(m_VPValue(CommonEdgeMask), m_VPValue())))
+ return;
+ for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++)
+ if (!match(Blend->getMask(I),
+ m_LogicalAnd(m_Specific(CommonEdgeMask), m_VPValue())))
+ return;
+ for (unsigned I = 0; I < Blend->getNumIncomingValues(); I++)
+ Blend->setMask(I, Blend->getMask(I)->getDefiningRecipe()->getOperand(1));
+}
+
/// Normalize and simplify VPBlendRecipes. Should be run after simplifyRecipes
/// to make sure the masks are simplified.
static void simplifyBlends(VPlan &Plan) {
@@ -1303,6 +1327,8 @@ static void simplifyBlends(VPlan &Plan) {
if (!Blend)
continue;
+ removeCommonBlendMask(Blend);
+
// Try to remove redundant blend recipes.
SmallPtrSet<VPValue *, 4> UniqueValues;
if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index 306bdc0030154..8c7624e570cf5 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -130,8 +130,7 @@ define void @blend_chain_iv(i1 %c) {
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[PREDPHI:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI]], <4 x i64> undef
+; CHECK-NEXT: [[PREDPHI1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI1]], <4 x i64> undef
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]]
@@ -146,7 +145,7 @@ define void @blend_chain_iv(i1 %c) {
; CHECK-NEXT: store i16 0, ptr [[TMP6]], align 2
; CHECK-NEXT: store i16 0, ptr [[TMP8]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI]], splat (i64 4)
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[PREDPHI1]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
>From 2a46b5fc51485d692b985f1ab2cb635f0c1ee5c9 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 4 Sep 2025 23:43:45 +0800
Subject: [PATCH 2/3] Fix return style to be consistent with rest of function
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 54f791ea16cf7..7de94717f56e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1109,11 +1109,9 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));
// x && !x -> 0
- if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X))))) {
- Def->replaceAllUsesWith(Plan->getOrAddLiveIn(
+ if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
+ return Def->replaceAllUsesWith(Plan->getOrAddLiveIn(
ConstantInt::getFalse(VPTypeAnalysis(*Plan).inferScalarType(Def))));
- return;
- }
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
return Def->replaceAllUsesWith(X);
>From 26eb86dd3049ee54a2735bf91ec7cfd1deb1cf68 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 5 Sep 2025 08:52:46 +0800
Subject: [PATCH 3/3] Fix test
---
...r87378-vpinstruction-or-drop-poison-generating-flags.ll | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
index 8b212f4ef9706..b330b6cd82c0a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll
@@ -26,13 +26,9 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1001, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[TMP25]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP25]] to i64
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 8 x i32> @llvm.stepvector.nxv8i32()
-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <vscale x 8 x i32> [[TMP10]], [[BROADCAST_SPLAT8]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP9:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP14]], <vscale x 8 x i1> zeroinitializer
@@ -42,9 +38,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64
; CHECK-NEXT: [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i1> [[TMP18]], <vscale x 8 x i1> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], splat (i1 true)
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 8 x i1> [[TMP11]], <vscale x 8 x i1> [[TMP21]], <vscale x 8 x i1> zeroinitializer
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i1> [[TMP21]], i32 0
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]]
; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr align 2 [[TMP24]], <vscale x 8 x i1> [[TMP22]], i32 [[TMP25]])
More information about the llvm-commits
mailing list