[llvm] LoopVectorize: Use a better heuristic for epilogue branch weights (PR #72589)

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 16 16:56:48 PST 2023


https://github.com/MatzeB created https://github.com/llvm/llvm-project/pull/72589

This is a follow-up to PR #72450 correcting the branch_weights used
for the epilogue loop header: Estimate the average loop trip-count of
the epilogue block as "0.5 * "unroll factor" * "vectorization width"`
and set the branch_weights accordingly.


>From d668722c3e07ff9bbcb927e9a886f696a561746a Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Thu, 16 Nov 2023 14:17:03 -0800
Subject: [PATCH] LoopVectorize: Use a better heuristic for epilogue branch
 weights

This is a follow-up to PR #72450 correcting the branch_weights used
for the epilogue loop header: Estimate the average loop trip-count of
the epilogue block as "0.5 * "unroll factor" * "vectorization width"`
and set the branch_weights accordingly.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 20 +++++++++++--------
 .../LoopVectorize/branch-weights.ll           |  3 ++-
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0a10f0d6471769a..750a3dcdc4b4cf0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -407,9 +407,6 @@ static constexpr uint32_t MemCheckBypassWeights[] = {1, 127};
 // Likelyhood of bypassing the vectorized loop because there are zero trips left
 // after prolog. See `emitIterationCountCheck`.
 static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
-// Likelyhood of bypassing the vectorized loop because of zero trips necessary.
-// See `emitMinimumVectorEpilogueIterCountCheck`.
-static constexpr uint32_t EpilogueMinItersBypassWeights[] = {1, 127};
 
 /// A helper function that returns true if the given type is irregular. The
 /// type is irregular if its allocated size doesn't equal the store size of an
@@ -3163,9 +3160,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
       // Assume that `Count % VectorTripCount` is equally distributed.
       unsigned TripCount = UF * VF.getKnownMinValue();
       assert(TripCount > 0 && "trip count should not be zero");
-      MDBuilder MDB(ScalarLatchTerm->getContext());
-      MDNode *BranchWeights = MDB.createBranchWeights(1, TripCount - 1);
-      BI.setMetadata(LLVMContext::MD_prof, BranchWeights);
+      uint32_t Weights[] = {1, TripCount - 1};
+      setBranchWeights(BI, Weights);
     }
   }
 
@@ -8079,8 +8075,16 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
 
   BranchInst &BI =
       *BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
-  if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
-    setBranchWeights(BI, EpilogueMinItersBypassWeights);
+  if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+    // Assume the tripcount for the epilogue loop is equally distributed between
+    // 0 and `VectorTripCount - 1` leaving us with a `VectorTripCount * 0.5`
+    // branch weight.
+    unsigned CombinedFactor = UF * VF.getKnownMinValue();
+    assert(CombinedFactor > 1 && "UF times VF should be bigger than one");
+    unsigned EstimatedTripCount = (CombinedFactor + 1) / 2;
+    uint32_t Weights[] = {1, EstimatedTripCount};
+    setBranchWeights(BI, Weights);
+  }
   ReplaceInstWithInst(Insert->getTerminator(), &BI);
 
   LoopBypassBlocks.push_back(Insert);
diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
index 36ff8e83b0ed124..c39369f2fa3b171 100644
--- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll
+++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll
@@ -24,7 +24,7 @@
 ; CHECK:   br i1 %cmp.n, label %exit.loopexit, label %vec.epilog.iter.check, !prof [[PROF_F0_MIDDLE_BLOCKS:![0-9]+]]
 ;
 ; CHECK: vec.epilog.iter.check:
-; CHECK:   br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_UNLIKELY]]
+; CHECK:   br i1 %min.epilog.iters.check, label %vec.epilog.scalar.ph, label %vec.epilog.ph, !prof [[PROF_F0_EPILOGUE_HEADER:![0-9]+]]
 ;
 ; CHECK: vec.epilog.ph:
 ; CHECK:   br label %vec.epilog.vector.body
@@ -77,5 +77,6 @@ exit:
 ; CHECK: [[PROF_F0_UNLIKELY]] = !{!"branch_weights", i32 1, i32 127}
 ; CEHCK: [[PROF_F0_VECTOR_BODY]] = !{!"branch_weights", i32 1, i32 307}
 ; CHECK: [[PROF_F0_MIDDLE_BLOCKS]] =  !{!"branch_weights", i32 1, i32 3}
+; CHECK: [[PROF_F0_EPILOGUE_HEADER]] = !{!"branch_weights", i32 1, i32 2}
 ; CHECK: [[PROF_F0_VEC_EPILOG_VECTOR_BODY]] = !{!"branch_weights", i32 0, i32 0}
 ; CEHCK: [[PROF_F0_LOOP]] = !{!"branch_weights", i32 2, i32 1}



More information about the llvm-commits mailing list