[libcxx-commits] [clang-tools-extra] [flang] [libc] [compiler-rt] [libcxx] [clang] [llvm] [mlir] [VPlan] Replace VPRecipeOrVPValue with VP2VP recipe simplification. (PR #76090)

Florian Hahn via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jan 29 01:35:13 PST 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/76090

>From 7c31c8bc2acf60bd50cb6d63944ee8d4946b9638 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 4 May 2023 21:33:24 +0100
Subject: [PATCH 1/5] [VPlan] Replace VPRecieOrVPValue with VP2VP recipe
 simplification.

Move simplification of VPBlendRecipes from early VPlan construction to
VPlan-to-VPlan based recipe simplification. This simplifies initial
construction.

Note that some in-loop reduction tests are failing at the moment, due to
the reduction predicate being created after the reduction recipe. I will
provide a patch for that soon.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 99 +++++++------------
 .../Transforms/Vectorize/VPRecipeBuilder.h    | 23 ++---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 34 ++++++-
 .../Transforms/Vectorize/VPlanTransforms.h    |  1 -
 4 files changed, 74 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f82e161fb846d18..609a927d23754b3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8256,31 +8256,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
   return nullptr;
 }
 
-VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
-                                                ArrayRef<VPValue *> Operands,
-                                                VPlanPtr &Plan) {
-  // If all incoming values are equal, the incoming VPValue can be used directly
-  // instead of creating a new VPBlendRecipe.
-  if (llvm::all_equal(Operands))
-    return Operands[0];
-
+VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
+                                           ArrayRef<VPValue *> Operands,
+                                           VPlanPtr &Plan) {
   unsigned NumIncoming = Phi->getNumIncomingValues();
-  // For in-loop reductions, we do not need to create an additional select.
-  VPValue *InLoopVal = nullptr;
-  for (unsigned In = 0; In < NumIncoming; In++) {
-    PHINode *PhiOp =
-        dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue());
-    if (PhiOp && CM.isInLoopReduction(PhiOp)) {
-      assert(!InLoopVal && "Found more than one in-loop reduction!");
-      InLoopVal = Operands[In];
-    }
-  }
-
-  assert((!InLoopVal || NumIncoming == 2) &&
-         "Found an in-loop reduction for PHI with unexpected number of "
-         "incoming values");
-  if (InLoopVal)
-    return Operands[Operands[0] == InLoopVal ? 1 : 0];
 
   // We know that all PHIs in non-header blocks are converted into selects, so
   // we don't have to worry about the insertion order and we can just use the
@@ -8292,13 +8271,13 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
   for (unsigned In = 0; In < NumIncoming; In++) {
     VPValue *EdgeMask =
         createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
-    assert((EdgeMask || NumIncoming == 1) &&
+    assert((EdgeMask || NumIncoming == 1 || Operands[In] == Operands[0]) &&
            "Multiple predecessors with one having a full mask");
     OperandsWithMask.push_back(Operands[In]);
     if (EdgeMask)
       OperandsWithMask.push_back(EdgeMask);
   }
-  return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask));
+  return new VPBlendRecipe(Phi, OperandsWithMask);
 }
 
 VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
@@ -8464,9 +8443,8 @@ void VPRecipeBuilder::fixHeaderPhis() {
   }
 }
 
-VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
-                                                       VFRange &Range,
-                                                       VPlan &Plan) {
+VPRecipeBase *VPRecipeBuilder::handleReplication(Instruction *I, VFRange &Range,
+                                                 VPlan &Plan) {
   bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
       [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
       Range);
@@ -8518,14 +8496,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
 
   auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()),
                                        IsUniform, BlockInMask);
-  return toVPRecipeResult(Recipe);
+  return Recipe;
 }
 
-VPRecipeOrVPValueTy
-VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
-                                        ArrayRef<VPValue *> Operands,
-                                        VFRange &Range, VPBasicBlock *VPBB,
-                                        VPlanPtr &Plan) {
+VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
+    Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
+    VPBasicBlock *VPBB, VPlanPtr &Plan) {
   // First, check for specific widening recipes that deal with inductions, Phi
   // nodes, calls and memory operations.
   VPRecipeBase *Recipe;
@@ -8538,7 +8514,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
     recordRecipeOf(Phi);
 
     if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
-      return toVPRecipeResult(Recipe);
+      return Recipe;
 
     VPHeaderPHIRecipe *PhiRecipe = nullptr;
     assert((Legal->isReductionVariable(Phi) ||
@@ -8570,13 +8546,13 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
       recordRecipeOf(Inc);
 
     PhisToFix.push_back(PhiRecipe);
-    return toVPRecipeResult(PhiRecipe);
+    return PhiRecipe;
   }
 
   if (isa<TruncInst>(Instr) &&
       (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
                                                Range, *Plan)))
-    return toVPRecipeResult(Recipe);
+    return Recipe;
 
   // All widen recipes below deal only with VF > 1.
   if (LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8584,29 +8560,29 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
     return nullptr;
 
   if (auto *CI = dyn_cast<CallInst>(Instr))
-    return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
+    return tryToWidenCall(CI, Operands, Range, Plan);
 
   if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
-    return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
+    return tryToWidenMemory(Instr, Operands, Range, Plan);
 
   if (!shouldWiden(Instr, Range))
     return nullptr;
 
   if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
-    return toVPRecipeResult(new VPWidenGEPRecipe(
-        GEP, make_range(Operands.begin(), Operands.end())));
+    return new VPWidenGEPRecipe(GEP,
+                                make_range(Operands.begin(), Operands.end()));
 
   if (auto *SI = dyn_cast<SelectInst>(Instr)) {
-    return toVPRecipeResult(new VPWidenSelectRecipe(
-        *SI, make_range(Operands.begin(), Operands.end())));
+    return new VPWidenSelectRecipe(
+        *SI, make_range(Operands.begin(), Operands.end()));
   }
 
   if (auto *CI = dyn_cast<CastInst>(Instr)) {
-    return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
-                                                  CI->getType(), *CI));
+    return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(),
+                                 *CI);
   }
 
-  return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
+  return tryToWiden(Instr, Operands, VPBB, Plan);
 }
 
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
@@ -8786,22 +8762,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
           Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
         continue;
 
-      auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
+      VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe(
           Instr, Operands, Range, VPBB, Plan);
-      if (!RecipeOrValue)
-        RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan);
-      // If Instr can be simplified to an existing VPValue, use it.
-      if (isa<VPValue *>(RecipeOrValue)) {
-        auto *VPV = cast<VPValue *>(RecipeOrValue);
-        Plan->addVPValue(Instr, VPV);
-        // If the re-used value is a recipe, register the recipe for the
-        // instruction, in case the recipe for Instr needs to be recorded.
-        if (VPRecipeBase *R = VPV->getDefiningRecipe())
-          RecipeBuilder.setRecipe(Instr, R);
-        continue;
-      }
-      // Otherwise, add the new recipe.
-      VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
+      if (!Recipe)
+        Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan);
       for (auto *Def : Recipe->definedValues()) {
         auto *UV = Def->getUnderlyingValue();
         Plan->addVPValue(UV, Def);
@@ -8966,10 +8930,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
 }
 
 // Adjust the recipes for reductions. For in-loop reductions the chain of
-// instructions leading from the loop exit instr to the phi need to be converted
-// to reductions, with one operand being vector and the other being the scalar
-// reduction chain. For other reductions, a select is introduced between the phi
-// and live-out recipes when folding the tail.
+// instructions leading from the loop exit instr to the phi need to be
+// converted to reductions, with one operand being vector and the other
+// being the scalar reduction chain. For other reductions, a select is
+// introduced between the phi and live-out recipes when folding the tail.
 void LoopVectorizationPlanner::adjustRecipesForReductions(
     VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
     ElementCount MinVF) {
@@ -9079,6 +9043,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
         LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
         VecOp = FMulRecipe;
       } else {
+        if (PhiR->isInLoop() && isa<VPBlendRecipe>(CurrentLink))
+          continue;
+
         if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
           if (isa<VPWidenRecipe>(CurrentLink)) {
             assert(isa<CmpInst>(CurrentLinkI) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 7ff6749a09089e9..0d8d7fc6f817709 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -21,8 +21,6 @@ class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class TargetLibraryInfo;
 
-using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>;
-
 /// Helper class to create VPRecipies from IR instructions.
 class VPRecipeBuilder {
   /// The loop that we evaluate.
@@ -88,8 +86,8 @@ class VPRecipeBuilder {
   /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
   /// into a sequence of select instructions as the vectorizer currently
   /// performs full if-conversion.
-  VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
-                                 VPlanPtr &Plan);
+  VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
+                            VPlanPtr &Plan);
 
   /// Handle call instructions. If \p CI can be widened for \p Range.Start,
   /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
@@ -103,9 +101,6 @@ class VPRecipeBuilder {
   VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
                            VPBasicBlock *VPBB, VPlanPtr &Plan);
 
-  /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue.
-  VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; }
-
 public:
   VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
                   LoopVectorizationLegality *Legal,
@@ -116,12 +111,11 @@ class VPRecipeBuilder {
 
   /// Check if an existing VPValue can be used for \p Instr or a recipe can be
   /// create for \p I withing the given VF \p Range. If an existing VPValue can
-  /// be used or if a recipe can be created, return it. Otherwise return a
-  /// VPRecipeOrVPValueTy with nullptr.
-  VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr,
-                                             ArrayRef<VPValue *> Operands,
-                                             VFRange &Range, VPBasicBlock *VPBB,
-                                             VPlanPtr &Plan);
+  /// be used or if a recipe can be created, return it.
+  VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
+                                       ArrayRef<VPValue *> Operands,
+                                       VFRange &Range, VPBasicBlock *VPBB,
+                                       VPlanPtr &Plan);
 
   /// Set the recipe created for given ingredient. This operation is a no-op for
   /// ingredients that were not marked using a nullptr entry in the map.
@@ -165,8 +159,7 @@ class VPRecipeBuilder {
   /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
   /// last operand. Range.End may be decreased to ensure same recipe behavior
   /// from \p Range.Start to \p Range.End.
-  VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range,
-                                        VPlan &Plan);
+  VPRecipeBase *handleReplication(Instruction *I, VFRange &Range, VPlan &Plan);
 
   /// Add the incoming values from the backedge to reduction & first-order
   /// recurrence cross-iteration phis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 33132880d5a444d..317d7806bf55119 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -806,6 +806,38 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
 
 /// Try to simplify recipe \p R.
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
+  // Try to remove redundant blend recipes.
+  if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+    if (Blend->getNumIncomingValues() == 1) {
+      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+      Blend->eraseFromParent();
+      return;
+    }
+
+    bool AllEqual = true;
+    for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
+      AllEqual &= Blend->getIncomingValue(0) == Blend->getIncomingValue(I);
+    if (AllEqual) {
+      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+      Blend->eraseFromParent();
+      return;
+    }
+    if (Blend->getNumIncomingValues() != 2)
+      return;
+    auto IsInLoopReduction = [](VPValue *VPV) {
+      auto *PhiR = dyn_cast<VPReductionPHIRecipe>(VPV);
+      return PhiR && PhiR->isInLoop();
+    };
+    if (IsInLoopReduction(Blend->getIncomingValue(0))) {
+      Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
+      Blend->eraseFromParent();
+    } else if (IsInLoopReduction(Blend->getIncomingValue(1))) {
+      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+      Blend->eraseFromParent();
+    }
+    return;
+  }
+
   switch (getOpcodeForRecipe(R)) {
   case Instruction::Mul: {
     VPValue *A = R.getOperand(0);
@@ -996,8 +1028,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
   removeRedundantCanonicalIVs(Plan);
   removeRedundantInductionCasts(Plan);
 
-  optimizeInductions(Plan, SE);
   simplifyRecipes(Plan, SE.getContext());
+  optimizeInductions(Plan, SE);
   removeDeadRecipes(Plan);
 
   createAndOptimizeReplicateRegions(Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3bf91115debb7d5..7ce2e5974f60b02 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -112,7 +112,6 @@ struct VPlanTransforms {
   /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
   /// them with already existing recipes expanding the same SCEV expression.
   static void removeRedundantExpandSCEVRecipes(VPlan &Plan);
-
 };
 
 } // namespace llvm

>From 96fcdc546bb06a9ec545f64339c7843a903f2198 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 16 Jan 2024 22:01:23 +0000
Subject: [PATCH 2/5] !fixup address latest comments, thanks

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 11 ++++++++++-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 19 -------------------
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index acfe0138c64e7c5..8907fda56025df5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8963,8 +8963,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
         LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
         VecOp = FMulRecipe;
       } else {
-        if (PhiR->isInLoop() && isa<VPBlendRecipe>(CurrentLink))
+        auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
+        if (PhiR->isInLoop() && Blend) {
+          assert(Blend->getNumIncomingValues() == 2);
+          assert(any_of(Blend->operands(),
+                        [PhiR](VPValue *Op) { return Op == PhiR; }));
+          if (Blend->getIncomingValue(0) == PhiR)
+            Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
+          else
+            Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
           continue;
+        }
 
         if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
           if (isa<VPWidenRecipe>(CurrentLink)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 19223a4da67b3d1..dc41454e48fd7df 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -808,12 +808,6 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   // Try to remove redundant blend recipes.
   if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
-    if (Blend->getNumIncomingValues() == 1) {
-      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
-      Blend->eraseFromParent();
-      return;
-    }
-
     bool AllEqual = true;
     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
       AllEqual &= Blend->getIncomingValue(0) == Blend->getIncomingValue(I);
@@ -822,19 +816,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
       Blend->eraseFromParent();
       return;
     }
-    if (Blend->getNumIncomingValues() != 2)
-      return;
-    auto IsInLoopReduction = [](VPValue *VPV) {
-      auto *PhiR = dyn_cast<VPReductionPHIRecipe>(VPV);
-      return PhiR && PhiR->isInLoop();
-    };
-    if (IsInLoopReduction(Blend->getIncomingValue(0))) {
-      Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
-      Blend->eraseFromParent();
-    } else if (IsInLoopReduction(Blend->getIncomingValue(1))) {
-      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
-      Blend->eraseFromParent();
-    }
     return;
   }
 

>From 8627dd4c97f5be9857becdd9081779c2644be0fb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 16 Jan 2024 22:03:42 +0000
Subject: [PATCH 3/5] !fiuxp re-add newline.

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 7ce2e5974f60b02..3bf91115debb7d5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -112,6 +112,7 @@ struct VPlanTransforms {
   /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
   /// them with already existing recipes expanding the same SCEV expression.
   static void removeRedundantExpandSCEVRecipes(VPlan &Plan);
+
 };
 
 } // namespace llvm

>From ba5e7870cbb77dba917b799a4b419755a1922db6 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 28 Jan 2024 13:21:35 +0000
Subject: [PATCH 4/5] !fixup address latest comments, thanks!

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 46 +++++++++++--------
 .../Transforms/Vectorize/VPRecipeBuilder.h    | 31 +++++++------
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 11 ++---
 3 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8394948fc09e0d0..d914cbc7e4d5c93 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8101,10 +8101,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
   BlockMaskCache[BB] = BlockMask;
 }
 
-VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
-                                                ArrayRef<VPValue *> Operands,
-                                                VFRange &Range,
-                                                VPlanPtr &Plan) {
+VPWidenMemoryInstructionRecipe *
+VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
+                                  VFRange &Range, VPlanPtr &Plan) {
   assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
          "Must be called with either a load or store");
 
@@ -8176,7 +8175,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
   return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
 }
 
-VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
+VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
     PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
 
   // Check if this is an integer or fp induction. If so, build the recipe that
@@ -8241,13 +8240,16 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
   SmallVector<VPValue *, 2> OperandsWithMask;
 
   for (unsigned In = 0; In < NumIncoming; In++) {
+    OperandsWithMask.push_back(Operands[In]);
     VPValue *EdgeMask =
         createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
-    assert((EdgeMask || NumIncoming == 1 || Operands[In] == Operands[0]) &&
-           "Multiple predecessors with one having a full mask");
-    OperandsWithMask.push_back(Operands[In]);
-    if (EdgeMask)
-      OperandsWithMask.push_back(EdgeMask);
+    if (!EdgeMask) {
+      assert(In == 0 && "Both null and non-null edge masks found");
+      assert(all_equal(Operands) &&
+             "Distinct incoming values with one having a full mask");
+      break;
+    }
+    OperandsWithMask.push_back(EdgeMask);
   }
   return new VPBlendRecipe(Phi, OperandsWithMask);
 }
@@ -8358,9 +8360,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
                                                              Range);
 }
 
-VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
-                                          ArrayRef<VPValue *> Operands,
-                                          VPBasicBlock *VPBB, VPlanPtr &Plan) {
+VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
+                                           ArrayRef<VPValue *> Operands,
+                                           VPBasicBlock *VPBB, VPlanPtr &Plan) {
   switch (I->getOpcode()) {
   default:
     return nullptr;
@@ -8417,8 +8419,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
   }
 }
 
-VPRecipeBase *VPRecipeBuilder::handleReplication(Instruction *I, VFRange &Range,
-                                                 VPlan &Plan) {
+VPReplicateRecipe *VPRecipeBuilder::handleReplication(Instruction *I,
+                                                      VFRange &Range,
+                                                      VPlan &Plan) {
   bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
       [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
       Range);
@@ -8991,7 +8994,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     // the phi until LoopExitValue. We keep track of the previous item
     // (PreviousLink) to tell which of the two operands of a Link will remain
     // scalar and which will be reduced. For minmax by select(cmp), Link will be
-    // the select instructions.
+    // the select instructions. Blend recipes will get folded to their non-phi
+    // operand, as the reduction recipe handles the condition directly.
     VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
     for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
       Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
@@ -9024,13 +9028,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       } else {
         auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
         if (PhiR->isInLoop() && Blend) {
-          assert(Blend->getNumIncomingValues() == 2);
-          assert(any_of(Blend->operands(),
-                        [PhiR](VPValue *Op) { return Op == PhiR; }));
+          assert(Blend->getNumIncomingValues() == 2 &&
+                 "Blend must have 2 incoming values");
           if (Blend->getIncomingValue(0) == PhiR)
             Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
-          else
+          else {
             Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+            assert(Blend->getIncomingValue(1) == PhiR &&
+                   "PhiR must be an operand of the blend");
+          }
           continue;
         }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index edb8b67965cb2ea..7cf05c8d471cea7 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -67,14 +67,16 @@ class VPRecipeBuilder {
   /// Check if the load or store instruction \p I should widened for \p
   /// Range.Start and potentially masked. Such instructions are handled by a
   /// recipe that takes an additional VPInstruction for the mask.
-  VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
-                                 VFRange &Range, VPlanPtr &Plan);
+  VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
+                                                   ArrayRef<VPValue *> Operands,
+                                                   VFRange &Range,
+                                                   VPlanPtr &Plan);
 
   /// Check if an induction recipe should be constructed for \p Phi. If so build
   /// and return it. If not, return null.
-  VPRecipeBase *tryToOptimizeInductionPHI(PHINode *Phi,
-                                          ArrayRef<VPValue *> Operands,
-                                          VPlan &Plan, VFRange &Range);
+  VPHeaderPHIRecipe *tryToOptimizeInductionPHI(PHINode *Phi,
+                                               ArrayRef<VPValue *> Operands,
+                                               VPlan &Plan, VFRange &Range);
 
   /// Optimize the special case where the operand of \p I is a constant integer
   /// induction variable.
@@ -82,10 +84,9 @@ class VPRecipeBuilder {
   tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
                                  VFRange &Range, VPlan &Plan);
 
-  /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match
-  /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned
-  /// into a sequence of select instructions as the vectorizer currently
-  /// performs full if-conversion.
+  /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
+  /// all such phi nodes are turned into a sequence of select instructions as
+  /// the vectorizer currently performs full if-conversion.
   VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands,
                             VPlanPtr &Plan);
 
@@ -98,8 +99,8 @@ class VPRecipeBuilder {
   /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
   /// if it can. The function should only be called if the cost-model indicates
   /// that widening should be performed.
-  VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
-                           VPBasicBlock *VPBB, VPlanPtr &Plan);
+  VPWidenRecipe *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands,
+                            VPBasicBlock *VPBB, VPlanPtr &Plan);
 
 public:
   VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
@@ -109,9 +110,8 @@ class VPRecipeBuilder {
       : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
         Builder(Builder) {}
 
-  /// Check if an existing VPValue can be used for \p Instr or a recipe can be
-  /// create for \p I withing the given VF \p Range. If an existing VPValue can
-  /// be used or if a recipe can be created, return it.
+  /// Create and return a widened recipe for \p I if one can be created within
+  /// the given VF \p Range.
   VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr,
                                        ArrayRef<VPValue *> Operands,
                                        VFRange &Range, VPBasicBlock *VPBB,
@@ -162,7 +162,8 @@ class VPRecipeBuilder {
   /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
   /// last operand. Range.End may be decreased to ensure same recipe behavior
   /// from \p Range.Start to \p Range.End.
-  VPRecipeBase *handleReplication(Instruction *I, VFRange &Range, VPlan &Plan);
+  VPReplicateRecipe *handleReplication(Instruction *I, VFRange &Range,
+                                       VPlan &Plan);
 
   /// Add the incoming values from the backedge to reduction & first-order
   /// recurrence cross-iteration phis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2d502ab0e83f4e0..4ce645006b14c60 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -829,14 +829,11 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   // Try to remove redundant blend recipes.
   if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
-    bool AllEqual = true;
     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
-      AllEqual &= Blend->getIncomingValue(0) == Blend->getIncomingValue(I);
-    if (AllEqual) {
-      Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
-      Blend->eraseFromParent();
-      return;
-    }
+      if (Blend->getIncomingValue(0) != Blend->getIncomingValue(I))
+        return;
+    Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+    Blend->eraseFromParent();
     return;
   }
 

>From 42fa0a0e5a57b030a96926a4ee33eb95614a3bb2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 29 Jan 2024 09:33:28 +0000
Subject: [PATCH 5/5] !fixup address latest comments, thanks!

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp   | 7 ++++---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 586baa95cafd989..3483e2c968e6be3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9008,8 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     // the phi until LoopExitValue. We keep track of the previous item
     // (PreviousLink) to tell which of the two operands of a Link will remain
     // scalar and which will be reduced. For minmax by select(cmp), Link will be
-    // the select instructions. Blend recipes will get folded to their non-phi
-    // operand, as the reduction recipe handles the condition directly.
+    // the select instructions. Blend recipes of in-loop reduction phi's  will
+    // get folded to their non-phi operand, as the reduction recipe handles the
+    // condition directly.
     VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
     for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef().drop_front()) {
       Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
@@ -9047,9 +9048,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
           if (Blend->getIncomingValue(0) == PhiR)
             Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
           else {
-            Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
             assert(Blend->getIncomingValue(1) == PhiR &&
                    "PhiR must be an operand of the blend");
+            Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
           }
           continue;
         }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4ce645006b14c60..5d7ac2bf9bb05f1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -829,10 +829,11 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) {
 static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
   // Try to remove redundant blend recipes.
   if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
+    VPValue *Inc0 = Blend->getIncomingValue(0);
     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
-      if (Blend->getIncomingValue(0) != Blend->getIncomingValue(I))
+      if (Inc0 != Blend->getIncomingValue(I))
         return;
-    Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
+    Blend->replaceAllUsesWith(Inc0);
     Blend->eraseFromParent();
     return;
   }



More information about the libcxx-commits mailing list