[llvm] [VPlan] Process simplifyRecipes via a worklist (PR #133977)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 05:09:49 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/133977
>From 43e9f29def38a9309354cb143138c5e1f364333d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 1 Apr 2025 19:05:46 +0100
Subject: [PATCH 1/3] [VPlan] Process simplifyRecipes as a worklist. NFCI
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 191 ++++++++++--------
.../Transforms/Vectorize/VPlanTransforms.h | 3 +
2 files changed, 110 insertions(+), 84 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9a041c83438dc..dc57144976d95 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -923,85 +923,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
using namespace llvm::VPlanPatternMatch;
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
- // Try to remove redundant blend recipes.
- SmallPtrSet<VPValue *, 4> UniqueValues;
- if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(0));
- for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
- if (!match(Blend->getMask(I), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(I));
-
- if (UniqueValues.size() == 1) {
- Blend->replaceAllUsesWith(*UniqueValues.begin());
- Blend->eraseFromParent();
- return;
- }
-
- if (Blend->isNormalized())
- return;
-
- // Normalize the blend so its first incoming value is used as the initial
- // value with the others blended into it.
-
- unsigned StartIndex = 0;
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- // If a value's mask is used only by the blend then is can be deadcoded.
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
- // that's used by multiple blends where it can be removed from them all.
- VPValue *Mask = Blend->getMask(I);
- if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
- StartIndex = I;
- break;
- }
- }
-
- SmallVector<VPValue *, 4> OperandsWithMask;
- OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
-
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- if (I == StartIndex)
- continue;
- OperandsWithMask.push_back(Blend->getIncomingValue(I));
- OperandsWithMask.push_back(Blend->getMask(I));
- }
-
- auto *NewBlend = new VPBlendRecipe(
- cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
- NewBlend->insertBefore(&R);
-
- VPValue *DeadMask = Blend->getMask(StartIndex);
- Blend->replaceAllUsesWith(NewBlend);
- Blend->eraseFromParent();
- recursivelyDeleteDeadRecipes(DeadMask);
-
- /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
- VPValue *NewMask;
- if (NewBlend->getNumOperands() == 3 &&
- match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
- VPValue *Inc0 = NewBlend->getOperand(0);
- VPValue *Inc1 = NewBlend->getOperand(1);
- VPValue *OldMask = NewBlend->getOperand(2);
- NewBlend->setOperand(0, Inc1);
- NewBlend->setOperand(1, Inc0);
- NewBlend->setOperand(2, NewMask);
- if (OldMask->getNumUsers() == 0)
- cast<VPInstruction>(OldMask)->eraseFromParent();
- }
- return;
- }
-
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
// part 0 can be replaced by their start value, if only the first lane is
// demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
vputils::onlyFirstLaneUsed(Steps)) {
- Steps->replaceAllUsesWith(Steps->getOperand(0));
- return;
+ return Steps->getOperand(0);
}
}
@@ -1011,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
Type *ATy = TypeInfo.inferScalarType(A);
if (TruncTy == ATy) {
- Trunc->replaceAllUsesWith(A);
+ return A;
} else {
// Don't replace a scalarizing recipe with a widened cast.
if (isa<VPReplicateRecipe>(&R))
- return;
+ return nullptr;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
@@ -1028,11 +959,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPC->setUnderlyingValue(UnderlyingExt);
}
VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
+ return VPC;
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
+ return VPC;
}
}
#ifndef NDEBUG
@@ -1056,17 +987,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPValue *X, *Y;
if (match(&R,
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
- R.getVPSingleValue()->replaceAllUsesWith(X);
- R.eraseFromParent();
- return;
- }
+ m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
+ return X;
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
+ return A;
if (match(&R, m_Not(m_Not(m_VPValue(A)))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
+ return A;
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(&R,
@@ -1075,16 +1003,110 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
TypeInfo.inferScalarType(R.getOperand(1)) ==
TypeInfo.inferScalarType(R.getVPSingleValue()))
- return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+ return R.getOperand(1);
+
+ return nullptr;
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+ SetVector<VPRecipeBase *> Worklist;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ Worklist.insert(&R);
+
+ while (!Worklist.empty()) {
+ VPRecipeBase *R = Worklist.pop_back_val();
+ if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
+ R->getVPSingleValue()->replaceAllUsesWith(Result);
+ R->eraseFromParent();
+ if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
+ Worklist.insert(ResultR);
+ for (VPUser *U : Result->users())
+ if (auto *UR = dyn_cast<VPRecipeBase>(U))
+ if (UR != R)
+ Worklist.insert(UR);
+ }
+ }
+}
+
+void VPlanTransforms::simplifyBlends(VPlan &Plan) {
+ using namespace llvm::VPlanPatternMatch;
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ SetVector<VPRecipeBase *> Worklist;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
+ if (!Blend)
+ continue;
+
+ // Try to remove redundant blend recipes.
+ SmallPtrSet<VPValue *, 4> UniqueValues;
+ if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(0));
+ for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
+ if (!match(Blend->getMask(I), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(I));
+
+ if (UniqueValues.size() == 1) {
+ Blend->replaceAllUsesWith(*UniqueValues.begin());
+ Blend->eraseFromParent();
+ continue;
+ }
+
+ if (Blend->isNormalized())
+ continue;
+
+ // Normalize the blend so its first incoming value is used as the initial
+ // value with the others blended into it.
+
+ unsigned StartIndex = 0;
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ // If a value's mask is used only by the blend then is can be deadcoded.
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
+ // that's used by multiple blends where it can be removed from them all.
+ VPValue *Mask = Blend->getMask(I);
+ if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
+ StartIndex = I;
+ break;
+ }
+ }
+
+ SmallVector<VPValue *, 4> OperandsWithMask;
+ OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
+
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ if (I == StartIndex)
+ continue;
+ OperandsWithMask.push_back(Blend->getIncomingValue(I));
+ OperandsWithMask.push_back(Blend->getMask(I));
+ }
+
+ auto *NewBlend = new VPBlendRecipe(
+ cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
+ NewBlend->insertBefore(&R);
+
+ VPValue *DeadMask = Blend->getMask(StartIndex);
+ Blend->replaceAllUsesWith(NewBlend);
+ Blend->eraseFromParent();
+ recursivelyDeleteDeadRecipes(DeadMask);
+
+ /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
+ VPValue *NewMask;
+ if (NewBlend->getNumOperands() == 3 &&
+ match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
+ VPValue *Inc0 = NewBlend->getOperand(0);
+ VPValue *Inc1 = NewBlend->getOperand(1);
+ VPValue *OldMask = NewBlend->getOperand(2);
+ NewBlend->setOperand(0, Inc1);
+ NewBlend->setOperand(1, Inc0);
+ NewBlend->setOperand(2, NewMask);
+ if (OldMask->getNumUsers() == 0)
+ cast<VPInstruction>(OldMask)->eraseFromParent();
+ }
}
}
}
@@ -1684,6 +1706,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeRedundantInductionCasts, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c23ff38265670..8fdb3f0025a05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -183,6 +183,9 @@ struct VPlanTransforms {
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Normalize and simplify VPBlendRecipes.
+ static void simplifyBlends(VPlan &Plan);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
>From 4e05ab9c58cae87981a2ad07aaaf322ec5933a8e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 2 Apr 2025 12:36:02 +0100
Subject: [PATCH 2/3] Remove erased type from VTypeAnalysis cache, move
assertion into all simplifications
---
llvm/lib/Transforms/Vectorize/VPlanAnalysis.h | 3 +++
.../Transforms/Vectorize/VPlanTransforms.cpp | 27 ++++++++++---------
2 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index cc21870bee2e3..ac2a8d997d2e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -63,6 +63,9 @@ class VPTypeAnalysis {
/// Return the LLVMContext used by the analysis.
LLVMContext &getContext() { return Ctx; }
+
+ /// Remove \p V from the cache. You must call this after a value is erased.
+ void erase(VPValue *V) { CachedTypes.erase(V); }
};
// Collect a VPlan's ephemeral recipes (those used only by an assume).
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index dc57144976d95..7fe01657248a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -966,18 +966,6 @@ static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return VPC;
}
}
-#ifndef NDEBUG
- // Verify that the cached type info is for both A and its users is still
- // accurate by comparing it to freshly computed types.
- VPTypeAnalysis TypeInfo2(
- R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
- assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
- for (VPUser *U : A->users()) {
- auto *R = cast<VPRecipeBase>(U);
- for (VPValue *VPV : R->definedValues())
- assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
- }
-#endif
}
// Simplify (X && Y) || (X && !Y) -> X.
@@ -1021,6 +1009,7 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
VPRecipeBase *R = Worklist.pop_back_val();
if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
R->getVPSingleValue()->replaceAllUsesWith(Result);
+ TypeInfo.erase(R->getVPSingleValue());
R->eraseFromParent();
if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
Worklist.insert(ResultR);
@@ -1028,6 +1017,20 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
if (auto *UR = dyn_cast<VPRecipeBase>(U))
if (UR != R)
Worklist.insert(UR);
+
+#ifndef NDEBUG
+ // Verify that the cached type info is for both Result and its users is
+ // still accurate by comparing it to freshly computed types.
+ VPTypeAnalysis TypeInfo2(&CanonicalIVTy);
+ assert(TypeInfo.inferScalarType(Result) ==
+ TypeInfo2.inferScalarType(Result));
+ for (VPUser *U : Result->users()) {
+ auto *R = cast<VPRecipeBase>(U);
+ for (VPValue *VPV : R->definedValues())
+ assert(TypeInfo.inferScalarType(VPV) ==
+ TypeInfo2.inferScalarType(VPV));
+ }
+#endif
}
}
}
>From 3ca3643c14a895bf94879ebfe9e85f942b7242ea Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 2 Apr 2025 13:09:05 +0100
Subject: [PATCH 3/3] Add instructions in reverse order, so it's processed in
order. This matches InstCombine
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7fe01657248a3..8771e40370a73 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1002,7 +1002,7 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
SetVector<VPRecipeBase *> Worklist;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
- for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ for (VPRecipeBase &R : reverse(*VPBB))
Worklist.insert(&R);
while (!Worklist.empty()) {
More information about the llvm-commits
mailing list