[llvm] [VPlan] Process simplifyRecipes via a worklist (PR #133977)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 04:37:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
Currently simplifyRecipes just traverses each instruction once in order, so if a simplification reveals a new possible combine it will be missed.
This moves it to an InstCombine style worklist which adds the new result and its users to the worklist when a simplification happens. It also processes instructions in reverse which should allow for larger patterns to be matched first.
The transform is somewhat of a mix of InstCombine and InstSimplify, i.e. some simplifications just return values, so any new recipes need to be inserted in simplifyRecipe.
This is the first step to addressing the "Split up into simpler, modular combines" TODO.
I needed to split out the VPBlendRecipe simplifications into a separate transform. Now that the instructions are processed in reverse order we were normalising the blends before other simplifications kicked in, but which mask is discarded during normalisation is determined by the number of uses it has. So this makes sure that the masks are simplified first before deciding.
---
Full diff: https://github.com/llvm/llvm-project/pull/133977.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.h (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+122-96)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+3)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
index cc21870bee2e3..ac2a8d997d2e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
@@ -63,6 +63,9 @@ class VPTypeAnalysis {
/// Return the LLVMContext used by the analysis.
LLVMContext &getContext() { return Ctx; }
+
+ /// Remove \p V from the cache. You must call this after a value is erased.
+ void erase(VPValue *V) { CachedTypes.erase(V); }
};
// Collect a VPlan's ephemeral recipes (those used only by an assume).
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9a041c83438dc..7fe01657248a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -923,85 +923,16 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+static VPValue *simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
using namespace llvm::VPlanPatternMatch;
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
- // Try to remove redundant blend recipes.
- SmallPtrSet<VPValue *, 4> UniqueValues;
- if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(0));
- for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
- if (!match(Blend->getMask(I), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(I));
-
- if (UniqueValues.size() == 1) {
- Blend->replaceAllUsesWith(*UniqueValues.begin());
- Blend->eraseFromParent();
- return;
- }
-
- if (Blend->isNormalized())
- return;
-
- // Normalize the blend so its first incoming value is used as the initial
- // value with the others blended into it.
-
- unsigned StartIndex = 0;
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- // If a value's mask is used only by the blend then is can be deadcoded.
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
- // that's used by multiple blends where it can be removed from them all.
- VPValue *Mask = Blend->getMask(I);
- if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
- StartIndex = I;
- break;
- }
- }
-
- SmallVector<VPValue *, 4> OperandsWithMask;
- OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
-
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- if (I == StartIndex)
- continue;
- OperandsWithMask.push_back(Blend->getIncomingValue(I));
- OperandsWithMask.push_back(Blend->getMask(I));
- }
-
- auto *NewBlend = new VPBlendRecipe(
- cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
- NewBlend->insertBefore(&R);
-
- VPValue *DeadMask = Blend->getMask(StartIndex);
- Blend->replaceAllUsesWith(NewBlend);
- Blend->eraseFromParent();
- recursivelyDeleteDeadRecipes(DeadMask);
-
- /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
- VPValue *NewMask;
- if (NewBlend->getNumOperands() == 3 &&
- match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
- VPValue *Inc0 = NewBlend->getOperand(0);
- VPValue *Inc1 = NewBlend->getOperand(1);
- VPValue *OldMask = NewBlend->getOperand(2);
- NewBlend->setOperand(0, Inc1);
- NewBlend->setOperand(1, Inc0);
- NewBlend->setOperand(2, NewMask);
- if (OldMask->getNumUsers() == 0)
- cast<VPInstruction>(OldMask)->eraseFromParent();
- }
- return;
- }
-
// VPScalarIVSteps can only be simplified after unrolling. VPScalarIVSteps for
// part 0 can be replaced by their start value, if only the first lane is
// demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
if (Steps->getParent()->getPlan()->isUnrolled() && Steps->isPart0() &&
vputils::onlyFirstLaneUsed(Steps)) {
- Steps->replaceAllUsesWith(Steps->getOperand(0));
- return;
+ return Steps->getOperand(0);
}
}
@@ -1011,11 +942,11 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Type *TruncTy = TypeInfo.inferScalarType(Trunc);
Type *ATy = TypeInfo.inferScalarType(A);
if (TruncTy == ATy) {
- Trunc->replaceAllUsesWith(A);
+ return A;
} else {
// Don't replace a scalarizing recipe with a widened cast.
if (isa<VPReplicateRecipe>(&R))
- return;
+ return nullptr;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
@@ -1028,25 +959,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPC->setUnderlyingValue(UnderlyingExt);
}
VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
+ return VPC;
} else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
+ return VPC;
}
}
-#ifndef NDEBUG
- // Verify that the cached type info is for both A and its users is still
- // accurate by comparing it to freshly computed types.
- VPTypeAnalysis TypeInfo2(
- R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
- assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
- for (VPUser *U : A->users()) {
- auto *R = cast<VPRecipeBase>(U);
- for (VPValue *VPV : R->definedValues())
- assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
- }
-#endif
}
// Simplify (X && Y) || (X && !Y) -> X.
@@ -1056,17 +975,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
VPValue *X, *Y;
if (match(&R,
m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y)))))) {
- R.getVPSingleValue()->replaceAllUsesWith(X);
- R.eraseFromParent();
- return;
- }
+ m_LogicalAnd(m_Deferred(X), m_Not(m_Deferred(Y))))))
+ return X;
if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
+ return A;
if (match(&R, m_Not(m_Not(m_VPValue(A)))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
+ return A;
// Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
if ((match(&R,
@@ -1075,16 +991,125 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
TypeInfo.inferScalarType(R.getOperand(1)) ==
TypeInfo.inferScalarType(R.getVPSingleValue()))
- return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+ return R.getOperand(1);
+
+ return nullptr;
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+ SetVector<VPRecipeBase *> Worklist;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ Worklist.insert(&R);
+
+ while (!Worklist.empty()) {
+ VPRecipeBase *R = Worklist.pop_back_val();
+ if (VPValue *Result = simplifyRecipe(*R, TypeInfo)) {
+ R->getVPSingleValue()->replaceAllUsesWith(Result);
+ TypeInfo.erase(R->getVPSingleValue());
+ R->eraseFromParent();
+ if (VPRecipeBase *ResultR = Result->getDefiningRecipe())
+ Worklist.insert(ResultR);
+ for (VPUser *U : Result->users())
+ if (auto *UR = dyn_cast<VPRecipeBase>(U))
+ if (UR != R)
+ Worklist.insert(UR);
+
+#ifndef NDEBUG
+ // Verify that the cached type info is for both Result and its users is
+ // still accurate by comparing it to freshly computed types.
+ VPTypeAnalysis TypeInfo2(&CanonicalIVTy);
+ assert(TypeInfo.inferScalarType(Result) ==
+ TypeInfo2.inferScalarType(Result));
+ for (VPUser *U : Result->users()) {
+ auto *R = cast<VPRecipeBase>(U);
+ for (VPValue *VPV : R->definedValues())
+ assert(TypeInfo.inferScalarType(VPV) ==
+ TypeInfo2.inferScalarType(VPV));
+ }
+#endif
+ }
+ }
+}
+
+void VPlanTransforms::simplifyBlends(VPlan &Plan) {
+ using namespace llvm::VPlanPatternMatch;
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ SetVector<VPRecipeBase *> Worklist;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
+ auto *Blend = dyn_cast<VPBlendRecipe>(&R);
+ if (!Blend)
+ continue;
+
+ // Try to remove redundant blend recipes.
+ SmallPtrSet<VPValue *, 4> UniqueValues;
+ if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(0));
+ for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
+ if (!match(Blend->getMask(I), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(I));
+
+ if (UniqueValues.size() == 1) {
+ Blend->replaceAllUsesWith(*UniqueValues.begin());
+ Blend->eraseFromParent();
+ continue;
+ }
+
+ if (Blend->isNormalized())
+ continue;
+
+ // Normalize the blend so its first incoming value is used as the initial
+ // value with the others blended into it.
+
+ unsigned StartIndex = 0;
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ // If a value's mask is used only by the blend then is can be deadcoded.
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
+ // that's used by multiple blends where it can be removed from them all.
+ VPValue *Mask = Blend->getMask(I);
+ if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
+ StartIndex = I;
+ break;
+ }
+ }
+
+ SmallVector<VPValue *, 4> OperandsWithMask;
+ OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
+
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ if (I == StartIndex)
+ continue;
+ OperandsWithMask.push_back(Blend->getIncomingValue(I));
+ OperandsWithMask.push_back(Blend->getMask(I));
+ }
+
+ auto *NewBlend = new VPBlendRecipe(
+ cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
+ NewBlend->insertBefore(&R);
+
+ VPValue *DeadMask = Blend->getMask(StartIndex);
+ Blend->replaceAllUsesWith(NewBlend);
+ Blend->eraseFromParent();
+ recursivelyDeleteDeadRecipes(DeadMask);
+
+ /// Simplify BLEND %a, %b, Not(%mask) -> BLEND %b, %a, %mask.
+ VPValue *NewMask;
+ if (NewBlend->getNumOperands() == 3 &&
+ match(NewBlend->getMask(1), m_Not(m_VPValue(NewMask)))) {
+ VPValue *Inc0 = NewBlend->getOperand(0);
+ VPValue *Inc1 = NewBlend->getOperand(1);
+ VPValue *OldMask = NewBlend->getOperand(2);
+ NewBlend->setOperand(0, Inc1);
+ NewBlend->setOperand(1, Inc0);
+ NewBlend->setOperand(2, NewMask);
+ if (OldMask->getNumUsers() == 0)
+ cast<VPInstruction>(OldMask)->eraseFromParent();
+ }
}
}
}
@@ -1684,6 +1709,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeRedundantInductionCasts, Plan);
runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index c23ff38265670..8fdb3f0025a05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -183,6 +183,9 @@ struct VPlanTransforms {
/// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Normalize and simplify VPBlendRecipes.
+ static void simplifyBlends(VPlan &Plan);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
``````````
</details>
https://github.com/llvm/llvm-project/pull/133977
More information about the llvm-commits
mailing list