[llvm] ddef380 - [VPlan] Move simplifyRecipe(s) definitions up to allow re-use (NFC)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 31 05:24:09 PST 2024
Author: Florian Hahn
Date: 2024-12-31T13:23:19Z
New Revision: ddef380cd6c30668cc6f6d952b4c045f724f8d57
URL: https://github.com/llvm/llvm-project/commit/ddef380cd6c30668cc6f6d952b4c045f724f8d57
DIFF: https://github.com/llvm/llvm-project/commit/ddef380cd6c30668cc6f6d952b4c045f724f8d57.diff
LOG: [VPlan] Move simplifyRecipe(s) definitions up to allow re-use (NFC)
Move definitions to allow easy reuse in
https://github.com/llvm/llvm-project/pull/108378.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1f5acf996a7720..89aab71905a293 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -662,6 +662,151 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
}
}
+/// Try to simplify recipe \p R.
+static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+ using namespace llvm::VPlanPatternMatch;
+
+ if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+ // Try to remove redundant blend recipes.
+ SmallPtrSet<VPValue *, 4> UniqueValues;
+ if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(0));
+ for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
+ if (!match(Blend->getMask(I), m_False()))
+ UniqueValues.insert(Blend->getIncomingValue(I));
+
+ if (UniqueValues.size() == 1) {
+ Blend->replaceAllUsesWith(*UniqueValues.begin());
+ Blend->eraseFromParent();
+ return;
+ }
+
+ if (Blend->isNormalized())
+ return;
+
+ // Normalize the blend so its first incoming value is used as the initial
+ // value with the others blended into it.
+
+ unsigned StartIndex = 0;
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ // If a value's mask is used only by the blend then is can be deadcoded.
+ // TODO: Find the most expensive mask that can be deadcoded, or a mask
+ // that's used by multiple blends where it can be removed from them all.
+ VPValue *Mask = Blend->getMask(I);
+ if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
+ StartIndex = I;
+ break;
+ }
+ }
+
+ SmallVector<VPValue *, 4> OperandsWithMask;
+ OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
+
+ for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
+ if (I == StartIndex)
+ continue;
+ OperandsWithMask.push_back(Blend->getIncomingValue(I));
+ OperandsWithMask.push_back(Blend->getMask(I));
+ }
+
+ auto *NewBlend = new VPBlendRecipe(
+ cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
+ NewBlend->insertBefore(&R);
+
+ VPValue *DeadMask = Blend->getMask(StartIndex);
+ Blend->replaceAllUsesWith(NewBlend);
+ Blend->eraseFromParent();
+ recursivelyDeleteDeadRecipes(DeadMask);
+ return;
+ }
+
+ VPValue *A;
+ if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
+ VPValue *Trunc = R.getVPSingleValue();
+ Type *TruncTy = TypeInfo.inferScalarType(Trunc);
+ Type *ATy = TypeInfo.inferScalarType(A);
+ if (TruncTy == ATy) {
+ Trunc->replaceAllUsesWith(A);
+ } else {
+ // Don't replace a scalarizing recipe with a widened cast.
+ if (isa<VPReplicateRecipe>(&R))
+ return;
+ if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+
+ unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
+ ? Instruction::SExt
+ : Instruction::ZExt;
+ auto *VPC =
+ new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+ if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
+ // UnderlyingExt has distinct return type, used to retain legacy cost.
+ VPC->setUnderlyingValue(UnderlyingExt);
+ }
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+ auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+ VPC->insertBefore(&R);
+ Trunc->replaceAllUsesWith(VPC);
+ }
+ }
+#ifndef NDEBUG
+ // Verify that the cached type info is for both A and its users is still
+ // accurate by comparing it to freshly computed types.
+ VPTypeAnalysis TypeInfo2(
+ R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
+ assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
+ for (VPUser *U : A->users()) {
+ auto *R = cast<VPRecipeBase>(U);
+ for (VPValue *VPV : R->definedValues())
+ assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
+ }
+#endif
+ }
+
+ // Simplify (X && Y) || (X && !Y) -> X.
+ // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
+ // && (Y || Z) and (X || !X) into true. This requires queuing newly created
+ // recipes to be visited during simplification.
+ VPValue *X, *Y, *X1, *Y1;
+ if (match(&R,
+ m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
+ m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
+ X == X1 && Y == Y1) {
+ R.getVPSingleValue()->replaceAllUsesWith(X);
+ R.eraseFromParent();
+ return;
+ }
+
+ if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
+ return R.getVPSingleValue()->replaceAllUsesWith(A);
+
+ if (match(&R, m_Not(m_Not(m_VPValue(A)))))
+ return R.getVPSingleValue()->replaceAllUsesWith(A);
+
+ // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
+ if ((match(&R,
+ m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||
+ match(&R,
+ m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
+ TypeInfo.inferScalarType(R.getOperand(1)) ==
+ TypeInfo.inferScalarType(R.getVPSingleValue()))
+ return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+}
+
+/// Try to simplify the recipes in \p Plan
+static void simplifyRecipes(VPlan &Plan) {
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+ VPTypeAnalysis TypeInfo(CanonicalIVType);
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ simplifyRecipe(R, TypeInfo);
+ }
+ }
+}
+
void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
unsigned BestUF,
PredicatedScalarEvolution &PSE) {
@@ -942,138 +1087,6 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
- using namespace llvm::VPlanPatternMatch;
-
- if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
- // Try to remove redundant blend recipes.
- SmallPtrSet<VPValue *, 4> UniqueValues;
- if (Blend->isNormalized() || !match(Blend->getMask(0), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(0));
- for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
- if (!match(Blend->getMask(I), m_False()))
- UniqueValues.insert(Blend->getIncomingValue(I));
-
- if (UniqueValues.size() == 1) {
- Blend->replaceAllUsesWith(*UniqueValues.begin());
- Blend->eraseFromParent();
- return;
- }
-
- if (Blend->isNormalized())
- return;
-
- // Normalize the blend so its first incoming value is used as the initial
- // value with the others blended into it.
-
- unsigned StartIndex = 0;
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- // If a value's mask is used only by the blend then is can be deadcoded.
- // TODO: Find the most expensive mask that can be deadcoded, or a mask
- // that's used by multiple blends where it can be removed from them all.
- VPValue *Mask = Blend->getMask(I);
- if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
- StartIndex = I;
- break;
- }
- }
-
- SmallVector<VPValue *, 4> OperandsWithMask;
- OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));
-
- for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {
- if (I == StartIndex)
- continue;
- OperandsWithMask.push_back(Blend->getIncomingValue(I));
- OperandsWithMask.push_back(Blend->getMask(I));
- }
-
- auto *NewBlend = new VPBlendRecipe(
- cast<PHINode>(Blend->getUnderlyingValue()), OperandsWithMask);
- NewBlend->insertBefore(&R);
-
- VPValue *DeadMask = Blend->getMask(StartIndex);
- Blend->replaceAllUsesWith(NewBlend);
- Blend->eraseFromParent();
- recursivelyDeleteDeadRecipes(DeadMask);
- return;
- }
-
- VPValue *A;
- if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
- VPValue *Trunc = R.getVPSingleValue();
- Type *TruncTy = TypeInfo.inferScalarType(Trunc);
- Type *ATy = TypeInfo.inferScalarType(A);
- if (TruncTy == ATy) {
- Trunc->replaceAllUsesWith(A);
- } else {
- // Don't replace a scalarizing recipe with a widened cast.
- if (isa<VPReplicateRecipe>(&R))
- return;
- if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
-
- unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
- ? Instruction::SExt
- : Instruction::ZExt;
- auto *VPC =
- new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
- // UnderlyingExt has distinct return type, used to retain legacy cost.
- VPC->setUnderlyingValue(UnderlyingExt);
- }
- VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
- } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
- auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
- VPC->insertBefore(&R);
- Trunc->replaceAllUsesWith(VPC);
- }
- }
-#ifndef NDEBUG
- // Verify that the cached type info is for both A and its users is still
- // accurate by comparing it to freshly computed types.
- VPTypeAnalysis TypeInfo2(
- R.getParent()->getPlan()->getCanonicalIV()->getScalarType());
- assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
- for (VPUser *U : A->users()) {
- auto *R = cast<VPRecipeBase>(U);
- for (VPValue *VPV : R->definedValues())
- assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
- }
-#endif
- }
-
- // Simplify (X && Y) || (X && !Y) -> X.
- // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
- // && (Y || Z) and (X || !X) into true. This requires queuing newly created
- // recipes to be visited during simplification.
- VPValue *X, *Y, *X1, *Y1;
- if (match(&R,
- m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
- m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
- X == X1 && Y == Y1) {
- R.getVPSingleValue()->replaceAllUsesWith(X);
- R.eraseFromParent();
- return;
- }
-
- if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
-
- if (match(&R, m_Not(m_Not(m_VPValue(A)))))
- return R.getVPSingleValue()->replaceAllUsesWith(A);
-
- // Remove redundant DerviedIVs, that is 0 + A * 1 -> A and 0 + 0 * x -> 0.
- if ((match(&R,
- m_DerivedIV(m_SpecificInt(0), m_VPValue(A), m_SpecificInt(1))) ||
- match(&R,
- m_DerivedIV(m_SpecificInt(0), m_SpecificInt(0), m_VPValue()))) &&
- TypeInfo.inferScalarType(R.getOperand(1)) ==
- TypeInfo.inferScalarType(R.getVPSingleValue()))
- return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
-}
-
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
@@ -1108,19 +1121,6 @@ static void licm(VPlan &Plan) {
}
}
-/// Try to simplify the recipes in \p Plan.
-static void simplifyRecipes(VPlan &Plan) {
- ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
- Plan.getEntry());
- Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
- VPTypeAnalysis TypeInfo(CanonicalIVType);
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
- }
- }
-}
-
void VPlanTransforms::truncateToMinimalBitwidths(
VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs) {
#ifndef NDEBUG
More information about the llvm-commits
mailing list