[llvm] [VPlan] Impl VPlan-based pattern match for ExtendedRed and MulAccRed (PR #113903)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 07:14:38 PDT 2025
================
@@ -2548,6 +2630,172 @@ void VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
}
+/// This function tries convert extended in-loop reductions to
+/// VPExtendedReductionRecipe and clamp the \p Range if it is beneficial and
+/// valid. The created recipe must be lowered to concrete
+/// recipes before execution.
+static VPExtendedReductionRecipe *
+tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
+ VFRange &Range) {
+ using namespace VPlanPatternMatch;
+
+ Type *RedTy = Ctx.Types.inferScalarType(Red);
+ VPValue *VecOp = Red->getVecOp();
+
+ // Test if using extended-reduction is profitable and clamp the range.
+ auto IsExtendedRedValidAndClampRange = [&](unsigned Opcode, bool isZExt,
+ Type *SrcTy) -> bool {
+ return LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+ Opcode, isZExt, RedTy, SrcVecTy, Red->getFastMathFlags(),
+ CostKind);
+ InstructionCost ExtCost =
+ cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
+ InstructionCost RedCost = Red->computeCost(VF, Ctx);
+ return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
+ },
+ Range);
+ };
+
+ VPValue *A;
+ // Match reduce(ext)).
+ if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
+ IsExtendedRedValidAndClampRange(
+ RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()),
+ cast<VPWidenCastRecipe>(VecOp)->getOpcode() ==
+ Instruction::CastOps::ZExt,
+ Ctx.Types.inferScalarType(A)))
+ return new VPExtendedReductionRecipe(Red, cast<VPWidenCastRecipe>(VecOp));
+
+ return nullptr;
+}
+
+/// This function tries convert extended in-loop reductions to
+/// VPMulAccumulateReductionRecipe and clamp the \p Range if it is beneficial
+/// and valid. The created VPExtendedReductionRecipe must be lower to concrete
+/// recipes before execution. Patterns of MulAccumulateReduction:
+/// reduce.add(mul(...)),
+/// reduce.add(mul(ext(A), ext(B))),
+/// reduce.add(ext(mul(ext(A), ext(B)))).
+static VPMulAccumulateReductionRecipe *
+tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
+ VPCostContext &Ctx, VFRange &Range) {
+ using namespace VPlanPatternMatch;
+
+ Type *RedTy = Ctx.Types.inferScalarType(Red);
+
+ // Test if using mulutiply-accumulate-reduction is beneficial and clamp the
+ // range.
+ auto IsMulAccValidAndClampRange =
+ [&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
+ VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool {
+ return LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ Type *SrcTy =
+ Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
+ auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
+ InstructionCost MulAccCost =
+ Ctx.TTI.getMulAccReductionCost(isZExt, RedTy, SrcVecTy, CostKind);
+ InstructionCost MulCost = Mul->computeCost(VF, Ctx);
+ InstructionCost RedCost = Red->computeCost(VF, Ctx);
+ InstructionCost ExtCost = 0;
+ if (Ext0)
+ ExtCost += Ext0->computeCost(VF, Ctx);
+ if (Ext1)
+ ExtCost += Ext1->computeCost(VF, Ctx);
+ if (OuterExt)
+ ExtCost += OuterExt->computeCost(VF, Ctx);
+
+ return MulAccCost.isValid() &&
+ MulAccCost < ExtCost + MulCost + RedCost;
+ },
+ Range);
+ };
+
+ unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
+ if (Opcode != Instruction::Add)
+ return nullptr;
+
+ VPValue *VecOp = Red->getVecOp();
+ VPValue *A, *B;
+ // Try to match reduce.add(mul(...))
+ if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
+ auto *RecipeA =
+ dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
+ auto *RecipeB =
+ dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
+ auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+
+ // Match reduce.add(mul(ext, ext))
+ if (RecipeA && RecipeB &&
+ (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) &&
+ match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
+ match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
+ IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
+ Instruction::CastOps::ZExt,
+ Mul, RecipeA, RecipeB, nullptr))
+ return new VPMulAccumulateReductionRecipe(Red, Mul, RecipeA, RecipeB,
+ RecipeA->getResultType());
+ // Match reduce.add(mul)
+ if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
+ return new VPMulAccumulateReductionRecipe(Red, Mul);
+ }
+ // Match reduce.add(ext(mul(ext(A), ext(B))))
+ // All extend recipes must have same opcode or A == B
+ // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
+ if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
+ m_ZExtOrSExt(m_VPValue()))))) {
+ auto *Ext = cast<VPWidenCastRecipe>(VecOp->getDefiningRecipe());
+ auto *Mul = cast<VPWidenRecipe>(Ext->getOperand(0)->getDefiningRecipe());
+ auto *Ext0 =
+ cast<VPWidenCastRecipe>(Mul->getOperand(0)->getDefiningRecipe());
+ auto *Ext1 =
+ cast<VPWidenCastRecipe>(Mul->getOperand(1)->getDefiningRecipe());
+ if ((Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
+ Ext0->getOpcode() == Ext1->getOpcode() &&
+ IsMulAccValidAndClampRange(Ext0->getOpcode() ==
+ Instruction::CastOps::ZExt,
+ Mul, Ext0, Ext1, Ext))
+ return new VPMulAccumulateReductionRecipe(Red, Mul, Ext0, Ext1,
+ Ext->getResultType());
+ }
+ return nullptr;
+}
+
+/// This function try to create abstract recipes from reduction recipe for
----------------
sdesmalen-arm wrote:
```suggestion
/// This function tries to create abstract recipes from the reduction recipe for
```
https://github.com/llvm/llvm-project/pull/113903
More information about the llvm-commits
mailing list