[llvm-branch-commits] [llvm] [LV] NFCI: Create VPExpressions in transformToPartialReductions. (PR #182863)

Sander de Smalen via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 4 00:49:14 PST 2026


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/182863

>From 5048f9f138a9f5a358e9398ac7dd0e9f6755cbdb Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 23 Feb 2026 09:41:16 +0000
Subject: [PATCH] [LV] NFCI: Create VPExpressions in
 transformToPartialReductions.

With this change, all logic to generate partial reductions and
recognising them as VPExpressions is contained in
`transformToPartialReductions`, without the need for a second
transform pass.
The PR intends to be a non-functional change.
---
 llvm/lib/Transforms/Vectorize/VPlan.h         |  3 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 68 +++++++++++++++----
 2 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 97e9c64d6481d..870bc72b49b43 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3291,6 +3291,9 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
     /// Represent an inloop multiply-accumulate reduction, multiplying the
     /// extended vector operands, negating the multiplication, performing a
     /// reduction.add on the result, and adding the scalar result to a chain.
+    /// FIXME: This one can be removed, because during codegen the extends
+    /// cannot be folded into the expression's operands, as described in
+    /// https://github.com/llvm/llvm-project/pull/178919.
     ExtNegatedMulAccReduction,
   };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2a1b4d7f2246b..b8ebf91ec5288 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4291,12 +4291,6 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx,
               cast<VPWidenCastRecipe>(VecOp)->computeCost(VF, Ctx);
           InstructionCost RedCost = Red->computeCost(VF, Ctx);
 
-          // For partial reductions, the decision has already been
-          // made at the point of transforming reductions -> partial
-          // reductions for a given plan, based on the cost-model.
-          if (Red->isPartialReduction())
-            return true;
-
           // TTI::getExtendedReductionCost for in-loop reductions
           // only supports integer types.
           if (RedTy->isFloatingPointTy())
@@ -4353,14 +4347,8 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
               Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
           InstructionCost MulAccCost;
 
-          // For partial reductions, the decision has already been
-          // made at the point of transforming reductions -> partial
-          // reductions for a given plan, based on the cost-model.
-          if (Red->isPartialReduction())
-            return true;
-
-          // Only partial reductions support mixed or floating-point extends
-          // at the moment.
+          // getMulAccReductionCost for in-loop reductions does not support
+          // mixed or floating-point extends.
           if (Ext0 && Ext1 &&
               (Ext0->getOpcode() != Ext1->getOpcode() ||
                Ext0->getOpcode() == Instruction::CastOps::FPExt))
@@ -4428,7 +4416,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
                                                  VPWidenCastRecipe *&ExtB,
                                                  VPValue *&ValB,
                                                  VPWidenRecipe *Mul) {
-    if (!ExtA || ExtB || !isa<VPIRValue>(ValB) || Red->isPartialReduction())
+    if (!ExtA || ExtB || !isa<VPIRValue>(ValB))
       return;
     Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
     Instruction::CastOps ExtOpc = ExtA->getOpcode();
@@ -4525,6 +4513,11 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
 static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red,
                                                VPCostContext &Ctx,
                                                VFRange &Range) {
+  // Creation of VPExpressions for partial reductions is entirely handled in
+  // `transformToPartialReduction`.
+  if (Red->isPartialReduction())
+    return;
+
   VPExpressionRecipe *AbstractR = nullptr;
   auto IP = std::next(Red->getIterator());
   auto *VPBB = Red->getParent();
@@ -5884,6 +5877,45 @@ optimizeExtendsForPartialReduction(VPSingleDefRecipe *BinOp,
   return BinOp;
 }
 
+static VPExpressionRecipe *
+createPartialReductionExpression(VPReductionRecipe *Red) {
+  VPValue *VecOp = Red->getVecOp();
+
+  // reduce.[f]add(ext(op))
+  //  -> VPExpressionRecipe(op, red)
+  if (isa<VPWidenCastRecipe>(VecOp) &&
+      (match(VecOp, m_ZExtOrSExt(m_VPValue())) ||
+       match(VecOp, m_FPExt(m_VPValue()))))
+    return new VPExpressionRecipe(cast<VPWidenCastRecipe>(VecOp), Red);
+
+  // reduce.[f]add([f]mul(ext(a), ext(b)))
+  //  -> VPExpressionRecipe(a, b, mul, red)
+  if (match(VecOp, m_FMul(m_FPExt(m_VPValue()), m_FPExt(m_VPValue()))) ||
+      match(VecOp,
+            m_Mul(m_ZExtOrSExt(m_VPValue()), m_ZExtOrSExt(m_VPValue())))) {
+    auto *Mul = cast<VPWidenRecipe>(VecOp);
+    auto *ExtA = cast<VPWidenCastRecipe>(Mul->getOperand(0));
+    auto *ExtB = cast<VPWidenCastRecipe>(Mul->getOperand(1));
+    return new VPExpressionRecipe(ExtA, ExtB, Mul, Red);
+  }
+
+  // reduce.add(neg(mul(ext(a), ext(b))))
+  //  -> VPExpressionRecipe(a, b, mul, sub, red)
+  if (match(VecOp, m_Sub(m_ZeroInt(), m_Mul(m_ZExtOrSExt(m_VPValue()),
+                                            m_ZExtOrSExt(m_VPValue()))))) {
+    // FIXME: This should be removed, because codegen can't optimally
+    // generate code for this. However, for complex-dot products we
+    // rely on it.
+    auto *Sub = cast<VPWidenRecipe>(VecOp);
+    auto *Mul = cast<VPWidenRecipe>(Sub->getOperand(1));
+    auto *ExtA = cast<VPWidenCastRecipe>(Mul->getOperand(0));
+    auto *ExtB = cast<VPWidenCastRecipe>(Mul->getOperand(1));
+    return new VPExpressionRecipe(ExtA, ExtB, Mul, Sub, Red);
+  }
+
+  return nullptr;
+}
+
 // Helper to transform a partial reduction chain into a partial reduction
 // recipe. Assumes profitability has been checked.
 static void transformToPartialReduction(const VPPartialReductionChain &Chain,
@@ -5958,6 +5990,12 @@ static void transformToPartialReduction(const VPPartialReductionChain &Chain,
     ExitValue->replaceAllUsesWith(PartialRed);
   WidenRecipe->replaceAllUsesWith(PartialRed);
 
+  // For cost-model purposes, see if we can fold this into a VPExpression.
+  if (VPExpressionRecipe *E = createPartialReductionExpression(PartialRed)) {
+    E->insertBefore(WidenRecipe);
+    PartialRed->replaceAllUsesWith(E);
+  }
+
   // We only need to update the PHI node once, which is when we find the
   // last reduction in the chain.
   if (!IsLastInChain)



More information about the llvm-branch-commits mailing list