[llvm] 67a55e0 - [VPlan] Replace getBestPlan by getBestVF use also for epilogue vec. (#98821)

Fri Jul 26 06:06:50 PDT 2024

Author: Florian Hahn
Date: 2024-07-26T14:06:46+01:00
New Revision: 67a55e01e3f13d6ea5be917765a4171cd68cb5ac

URL: https://github.com/llvm/llvm-project/commit/67a55e01e3f13d6ea5be917765a4171cd68cb5ac
DIFF: https://github.com/llvm/llvm-project/commit/67a55e01e3f13d6ea5be917765a4171cd68cb5ac.diff

LOG: [VPlan] Replace getBestPlan by getBestVF use also for epilogue vec. (#98821)

Replace getBestPlan by getBestVF which simply finds the best
VF out of the VFs for the available VPlans.

Then use getBestPlan to retrieve the corresponding VPlan.

This allows using getBestVF & getBestPlan for epilogue vectorization
as well. As the same plan may be used to vectorize both the main
and epilogue loop, restricting the VF of the best plan would cause
issues.

PR: https://github.com/llvm/llvm-project/pull/98821

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index c63cf0c37f2f9..590f3153cd084 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -365,8 +365,8 @@ class LoopVectorizationPlanner {
   /// Return the best VPlan for \p VF.
   VPlan &getBestPlanFor(ElementCount VF) const;
 
-  /// Return the most profitable plan and fix its VF to the most profitable one.
-  VPlan &getBestPlan() const;
+  /// Return the most profitable vectorization factor.
+  ElementCount getBestVF() const;
 
   /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
   /// according to the best selected \p VF and  \p UF.

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea19af10a1474..9733ac073eedd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7162,13 +7162,12 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
   return Cost;
 }
 
-VPlan &LoopVectorizationPlanner::getBestPlan() const {
+ElementCount LoopVectorizationPlanner::getBestVF() const {
   // If there is a single VPlan with a single VF, return it directly.
   VPlan &FirstPlan = *VPlans[0];
   if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
-    return FirstPlan;
+    return *FirstPlan.vectorFactors().begin();
 
-  VPlan *BestPlan = &FirstPlan;
   ElementCount ScalarVF = ElementCount::getFixed(1);
   assert(hasPlanWithVF(ScalarVF) &&
          "More than a single plan/VF w/o any plan having scalar VF");
@@ -7199,14 +7198,11 @@ VPlan &LoopVectorizationPlanner::getBestPlan() const {
 
       InstructionCost Cost = cost(*P, VF);
       VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
-      if (isMoreProfitable(CurrentFactor, BestFactor)) {
+      if (isMoreProfitable(CurrentFactor, BestFactor))
         BestFactor = CurrentFactor;
-        BestPlan = &*P;
-      }
     }
   }
-  BestPlan->setVF(BestFactor.Width);
-  return *BestPlan;
+  return BestFactor.Width;
 }
 
 VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -10001,10 +9997,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
                                  &CM, BFI, PSI, Checks);
 
-      VPlan &BestPlan = LVP.getBestPlan();
-      assert(BestPlan.hasScalarVFOnly() &&
+      ElementCount BestVF = LVP.getBestVF();
+      assert(BestVF.isScalar() &&
              "VPlan cost model and legacy cost model disagreed");
-      LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
+      VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
+      LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);
 
       ORE->emit([&]() {
         return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10015,21 +10012,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     } else {
       // If we decided that it is *legal* to vectorize the loop, then do it.
 
+      ElementCount BestVF = LVP.getBestVF();
+      LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
+      assert(VF.Width == BestVF &&
+             "VPlan cost model and legacy cost model disagreed");
+      VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
       // Consider vectorizing the epilogue too if it's profitable.
       VectorizationFactor EpilogueVF =
-          LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
+          LVP.selectEpilogueVectorizationFactor(BestVF, IC);
       if (EpilogueVF.Width.isVector()) {
 
         // The first pass vectorizes the main loop and creates a scalar epilogue
         // to be vectorized by executing the plan (potentially with a 
diff erent
         // factor) again shortly afterwards.
-        EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
+        EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
         EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
                                            EPI, &LVL, &CM, BFI, PSI, Checks);
 
         assert(EPI.MainLoopVF == VF.Width && "VFs must match");
-        std::unique_ptr<VPlan> BestMainPlan(
-            LVP.getBestPlanFor(VF.Width).duplicate());
+        std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
         const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
             EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, true);
         ++LoopsVectorized;
@@ -10120,18 +10121,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         if (!MainILV.areSafetyChecksAdded())
           DisableRuntimeUnroll = true;
       } else {
-        VPlan &BestPlan = LVP.getBestPlan();
-        assert(size(BestPlan.vectorFactors()) == 1 &&
-               "Plan should have a single VF");
-        ElementCount Width = *BestPlan.vectorFactors().begin();
-        LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
-                          << "\n");
-        assert(VF.Width == Width &&
-               "VPlan cost model and legacy cost model disagreed");
-        InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, Width,
+        InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
                                VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
                                PSI, Checks);
-        LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
+        LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
         ++LoopsVectorized;
 
         // Add metadata to disable runtime unrolling a scalar loop when there

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 3a14842580425..9c9547c8b6322 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -135,8 +135,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Not Interleaving.
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
 ; CHECK-NEXT:  Live-in vp<%0> = VF * UF
@@ -340,8 +340,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Not Interleaving.
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
 ; CHECK-NEXT:  Live-in vp<%0> = VF * UF