[llvm] [VPlan] Update getBestPlan to return VF, use also for epilogue vec. (PR #98821)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 20 04:55:06 PDT 2024


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/98821

>From f9fc443fa579217362d9e28dab6803eabd7be703 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 14 Jul 2024 16:20:41 +0100
Subject: [PATCH 1/2] [VPlan] Update getBestPlan to return VF, use also for
 epilogue vec.

Update getBestPlan to return the VF alongside the best plan instead of
restricting the returned plan's VFs to the best VF.

This is allows using getBestPlan to also get the best VPlan for epilogue
vectorization. As the same plan may be used to vectorize both the main
and epilogue loop, restricting the VF of the best plan would cause
issues.
---
 .../Vectorize/LoopVectorizationPlanner.h      |  5 +++--
 .../Transforms/Vectorize/LoopVectorize.cpp    | 20 ++++++++-----------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index c63cf0c37f2f9..62b5d270e2f04 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -365,8 +365,9 @@ class LoopVectorizationPlanner {
   /// Return the best VPlan for \p VF.
   VPlan &getBestPlanFor(ElementCount VF) const;
 
-  /// Return the most profitable plan and fix its VF to the most profitable one.
-  VPlan &getBestPlan() const;
+  /// Return the most profitable vectorization factor together with the most
+  /// profitable plan containing that vectorization factor.
+  std::pair<ElementCount, VPlan &> getBestPlan() const;
 
   /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
   /// according to the best selected \p VF and  \p UF.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7d37d67cde29c..2ea136c7ebc48 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7423,11 +7423,11 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
   return Cost;
 }
 
-VPlan &LoopVectorizationPlanner::getBestPlan() const {
+std::pair<ElementCount, VPlan &> LoopVectorizationPlanner::getBestPlan() const {
   // If there is a single VPlan with a single VF, return it directly.
   VPlan &FirstPlan = *VPlans[0];
   if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
-    return FirstPlan;
+    return {*FirstPlan.vectorFactors().begin(), FirstPlan};
 
   VPlan *BestPlan = &FirstPlan;
   ElementCount ScalarVF = ElementCount::getFixed(1);
@@ -7466,8 +7466,7 @@ VPlan &LoopVectorizationPlanner::getBestPlan() const {
       }
     }
   }
-  BestPlan->setVF(BestFactor.Width);
-  return *BestPlan;
+  return {BestFactor.Width, *BestPlan};
 }
 
 VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -10287,6 +10286,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     } else {
       // If we decided that it is *legal* to vectorize the loop, then do it.
 
+      const auto &[Width, BestPlan] = LVP.getBestPlan();
+      LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width << "\n");
+      assert(VF.Width == Width &&
+             "VPlan cost model and legacy cost model disagreed");
+
       // Consider vectorizing the epilogue too if it's profitable.
       VectorizationFactor EpilogueVF =
           LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
@@ -10395,14 +10399,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                                VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
                                PSI, Checks);
 
-        VPlan &BestPlan = LVP.getBestPlan();
-        assert(size(BestPlan.vectorFactors()) == 1 &&
-               "Plan should have a single VF");
-        ElementCount Width = *BestPlan.vectorFactors().begin();
-        LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
-                          << "\n");
-        assert(VF.Width == Width &&
-               "VPlan cost model and legacy cost model disagreed");
         LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
         ++LoopsVectorized;
 

>From 69fd80e097419738608941ad8452273cdacb2fc8 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 20 Jul 2024 10:56:01 +0100
Subject: [PATCH 2/2] !fixup update after updating to latest main.

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 32 ++++++++-----------
 .../RISCV/riscv-vector-reverse.ll             |  4 +--
 2 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f472aee34ef28..873f4b2d7db22 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10320,8 +10320,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
                                  &CM, BFI, PSI, Checks);
 
-      VPlan &BestPlan =
-          UseLegacyCostModel ? LVP.getBestPlanFor(VF.Width) : LVP.getBestPlan();
+      VPlan &BestPlan = UseLegacyCostModel ? LVP.getBestPlanFor(VF.Width)
+                                           : LVP.getBestPlan().second;
       assert((UseLegacyCostModel || BestPlan.hasScalarVFOnly()) &&
              "VPlan cost model and legacy cost model disagreed");
       LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
@@ -10335,10 +10335,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     } else {
       // If we decided that it is *legal* to vectorize the loop, then do it.
 
-      const auto &[Width, BestPlan] = LVP.getBestPlan();
-      LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width << "\n");
-      assert(VF.Width == Width &&
-             "VPlan cost model and legacy cost model disagreed");
+      ElementCount Width = VF.Width;
+      std::optional<VPlan *> VPlanFromVPCost;
+      if (!UseLegacyCostModel) {
+        const auto &[VPWidth, Plan] = LVP.getBestPlan();
+        LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
+                          << "\n");
+        Width = VPWidth;
+        assert(VF.Width == Width &&
+               "VPlan cost model and legacy cost model disagreed");
+      }
+      VPlan &BestPlan =
+          *VPlanFromVPCost.value_or(&LVP.getBestPlanFor(VF.Width));
 
       // Consider vectorizing the epilogue too if it's profitable.
       VectorizationFactor EpilogueVF =
@@ -10444,18 +10452,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         if (!MainILV.areSafetyChecksAdded())
           DisableRuntimeUnroll = true;
       } else {
-        ElementCount Width = VF.Width;
-        VPlan &BestPlan =
-            UseLegacyCostModel ? LVP.getBestPlanFor(Width) : LVP.getBestPlan();
-        if (!UseLegacyCostModel) {
-          assert(size(BestPlan.vectorFactors()) == 1 &&
-                 "Plan should have a single VF");
-          Width = *BestPlan.vectorFactors().begin();
-          LLVM_DEBUG(dbgs()
-                     << "VF picked by VPlan cost model: " << Width << "\n");
-          assert(VF.Width == Width &&
-                 "VPlan cost model and legacy cost model disagreed");
-        }
         InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, Width,
                                VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
                                PSI, Checks);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 8e9713fecf29d..7fb580be9e22e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -131,8 +131,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Not Interleaving.
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK:       LV: Interleaving disabled by the pass manager
 ; CHECK-NEXT:  LV: Loop does not require scalar epilogue
@@ -287,8 +287,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Not Interleaving.
 ; CHECK-NEXT:  LV: Interleaving is not beneficial.
 ; CHECK-NEXT:  LV: Found a vectorizable loop (vscale x 4) in <stdin>
-; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  VF picked by VPlan cost model: vscale x 4
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK:       LV: Interleaving disabled by the pass manager
 ; CHECK-NEXT:  LV: Loop does not require scalar epilogue



More information about the llvm-commits mailing list