[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Mar 30 18:56:58 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ryotaro Kasuga (kasuga-fj)

<details>
<summary>Changes</summary>

The vectorization profitability has a process to check whether a given loop can be vectorized or not. Since the process is conservative, a loop that can be vectorized may be deemed not to be possible. This can trigger unnecessary exchanges.
This patch improves the profitability decision by mitigating such misjudgments. Before this patch, we considered a loop to be vectorizable only when there are no loop carried dependencies with the IV of the loop. However, a loop carried dependency doesn't prevent vectorization if the distance is positive. This patch makes the vectorization check more accurate by allowing a loop with the positive dependency. Note that it is difficult to make a complete decision whether a loop can be vectorized or not. To achieve this, we must check the vector width and the distance of dependency.

---
Full diff: https://github.com/llvm/llvm-project/pull/133672.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+103-25) 
- (modified) llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll (+3-5) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b6b0b7d7a947a..0c3a9cbfeed5f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,8 +17,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -80,6 +80,21 @@ enum class RuleTy {
   ForVectorization,
 };
 
+/// Store the information about if corresponding direction vector was negated
+/// by normalization or not. This is necessary to restore the original one from
+/// a row of a dependency matrix because we only manage normalized direction
+/// vectors. Also, duplicate vectors are eliminated, so there may be both
+/// original and negated vectors for a single entry (a row of dependency
+/// matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the
+/// later one will be converted to the same as former one by normalization, so
+/// only `[< =]` would be retained in the final result.
+struct NegatedStatus {
+  bool Original = false;
+  bool Negated = false;
+
+  bool isNonNegativeDir(char Dir) const;
+};
+
 } // end anonymous namespace
 
 // Minimum loop depth supported.
@@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
 }
 #endif
 
-static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
-                                     Loop *L, DependenceInfo *DI,
-                                     ScalarEvolution *SE,
+static bool populateDependencyMatrix(CharMatrix &DepMatrix,
+                                     std::vector<NegatedStatus> &NegStatusVec,
+                                     unsigned Level, Loop *L,
+                                     DependenceInfo *DI, ScalarEvolution *SE,
                                      OptimizationRemarkEmitter *ORE) {
   using ValueVector = SmallVector<Value *, 16>;
 
@@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
     return false;
   }
   ValueVector::iterator I, IE, J, JE;
-  StringSet<> Seen;
+
+  // Manage all found direction vectors. and map it to the index of DepMatrix.
+  StringMap<unsigned> Seen;
 
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
         // If the direction vector is negative, normalize it to
         // make it non-negative.
-        if (D->normalize(SE))
+        bool Normalized = D->normalize(SE);
+        if (Normalized)
           LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
         LLVM_DEBUG(StringRef DepType =
                        D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
@@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
         }
 
         // Make sure we only add unique entries to the dependency matrix.
-        if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
+        unsigned Index = DepMatrix.size();
+        auto [Ite, Inserted] =
+            Seen.try_emplace(StringRef(Dep.data(), Dep.size()), Index);
+        if (Inserted) {
           DepMatrix.push_back(Dep);
+          NegStatusVec.push_back(NegatedStatus{});
+        } else
+          Index = Ite->second;
+
+        NegatedStatus &Status = NegStatusVec[Index];
+        (Normalized ? Status.Negated : Status.Original) = true;
       }
     }
   }
@@ -400,6 +428,7 @@ class LoopInterchangeProfitability {
   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
                     unsigned InnerLoopId, unsigned OuterLoopId,
                     CharMatrix &DepMatrix,
+                    const std::vector<NegatedStatus> &NegStatusVec,
                     const DenseMap<const Loop *, unsigned> &CostMap,
                     std::unique_ptr<CacheCost> &CC);
 
@@ -409,9 +438,10 @@ class LoopInterchangeProfitability {
       const DenseMap<const Loop *, unsigned> &CostMap,
       std::unique_ptr<CacheCost> &CC);
   std::optional<bool> isProfitablePerInstrOrderCost();
-  std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
-                                                   unsigned OuterLoopId,
-                                                   CharMatrix &DepMatrix);
+  std::optional<bool>
+  isProfitableForVectorization(unsigned InnerLoopId, unsigned OuterLoopId,
+                               CharMatrix &DepMatrix,
+                               const std::vector<NegatedStatus> &NegStatusVec);
   Loop *OuterLoop;
   Loop *InnerLoop;
 
@@ -503,8 +533,9 @@ struct LoopInterchange {
                       << "\n");
 
     CharMatrix DependencyMatrix;
+    std::vector<NegatedStatus> NegStatusVec;
     Loop *OuterMostLoop = *(LoopList.begin());
-    if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
+    if (!populateDependencyMatrix(DependencyMatrix, NegStatusVec, LoopNestDepth,
                                   OuterMostLoop, DI, SE, ORE)) {
       LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
       return false;
@@ -543,8 +574,8 @@ struct LoopInterchange {
     for (unsigned j = SelecLoopId; j > 0; j--) {
       bool ChangedPerIter = false;
       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
-        bool Interchanged =
-            processLoop(LoopList, i, i - 1, DependencyMatrix, CostMap);
+        bool Interchanged = processLoop(LoopList, i, i - 1, DependencyMatrix,
+                                        NegStatusVec, CostMap);
         ChangedPerIter |= Interchanged;
         Changed |= Interchanged;
       }
@@ -559,6 +590,8 @@ struct LoopInterchange {
   bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
                    unsigned OuterLoopId,
                    std::vector<std::vector<char>> &DependencyMatrix,
+
+                   const std::vector<NegatedStatus> &NegStatusVec,
                    const DenseMap<const Loop *, unsigned> &CostMap) {
     Loop *OuterLoop = LoopList[OuterLoopId];
     Loop *InnerLoop = LoopList[InnerLoopId];
@@ -572,7 +605,7 @@ struct LoopInterchange {
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
-                          DependencyMatrix, CostMap, CC)) {
+                          DependencyMatrix, NegStatusVec, CostMap, CC)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
     }
@@ -1197,27 +1230,71 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
   return std::nullopt;
 }
 
+static char flipDirection(char Dir) {
+  switch (Dir) {
+  case '<':
+    return '>';
+  case '>':
+    return '<';
+  case '=':
+  case 'I':
+  case '*':
+    return Dir;
+  default:
+    llvm_unreachable("Unknown direction");
+  }
+}
+
+/// Ensure that there are no negative direction dependencies corresponding to \p
+/// Dir.
+bool NegatedStatus::isNonNegativeDir(char Dir) const {
+  assert((Original || Negated) && "Cannot restore the original direction");
+
+  // If both flag is true, it means that there is both as-is and negated
+  // direction. In this case only `=` or `I` don't have negative direction
+  // dependency.
+  if (Original && Negated)
+    return Dir == '=' || Dir == 'I';
+
+  char Restored = Negated ? flipDirection(Dir) : Dir;
+  return Restored == '=' || Restored == 'I' || Restored == '<';
+}
+
 /// Return true if we can vectorize the loop specified by \p LoopId.
-static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
+static bool canVectorize(const CharMatrix &DepMatrix,
+                         const std::vector<NegatedStatus> &NegStatusVec,
+                         unsigned LoopId) {
+  // The loop can be vectorized if there are no negative dependencies. Consider
+  // the dependency of `j` in the following example.
+  //
+  //   Positive: ... = A[i][j]       Negative: ... = A[i][j-1]
+  //             A[i][j-1] = ...               A[i][j] = ...
+  //
+  // In the right case, vectorizing the loop can change the loaded value from
+  // `A[i][j-1]`. At the moment we don't take into account the distance of the
+  // dependency and vector width.
+  // TODO: Considering the dependency distance and the vector width can give a
+  // more accurate result. For example, the following loop can be vectorized if
+  // the vector width is less than or equal to 4 x sizeof(A[0][0]).
   for (unsigned I = 0; I != DepMatrix.size(); I++) {
     char Dir = DepMatrix[I][LoopId];
-    if (Dir != 'I' && Dir != '=')
+    if (!NegStatusVec[I].isNonNegativeDir(Dir))
       return false;
   }
   return true;
 }
 
 std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
-    unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
-  // If the outer loop is not loop independent it is not profitable to move
-  // this to inner position, since doing so would not enable inner loop
-  // parallelism.
-  if (!canVectorize(DepMatrix, OuterLoopId))
+    unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix,
+    const std::vector<NegatedStatus> &NegStatusVec) {
+  // If the outer loop cannot be vectorized, it is not profitable to move this
+  // to inner position.
+  if (!canVectorize(DepMatrix, NegStatusVec, OuterLoopId))
     return false;
 
-  // If inner loop has dependence and outer loop is loop independent then it is
+  // If inner loop cannot be vectorized and outer loop can be then it is
   // profitable to interchange to enable inner loop parallelism.
-  if (!canVectorize(DepMatrix, InnerLoopId))
+  if (!canVectorize(DepMatrix, NegStatusVec, InnerLoopId))
     return true;
 
   // TODO: Estimate the cost of vectorized loop body when both the outer and the
@@ -1228,6 +1305,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix,
+    const std::vector<NegatedStatus> &NegStatusVec,
     const DenseMap<const Loop *, unsigned> &CostMap,
     std::unique_ptr<CacheCost> &CC) {
   // isProfitable() is structured to avoid endless loop interchange. If the
@@ -1249,8 +1327,8 @@ bool LoopInterchangeProfitability::isProfitable(
       shouldInterchange = isProfitablePerInstrOrderCost();
       break;
     case RuleTy::ForVectorization:
-      shouldInterchange =
-          isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+      shouldInterchange = isProfitableForVectorization(InnerLoopId, OuterLoopId,
+                                                       DepMatrix, NegStatusVec);
       break;
     }
 
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
index b82dd5141a6b2..b83b6b37a6eda 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -64,15 +64,13 @@ exit:
 ;   for (int j = 1; j < 256; j++)
 ;     A[i][j-1] = A[i][j] + B[i][j];
 ;
-; FIXME: These loops are exchanged at this time due to the problem of
-; profitablity heuristic for vectorization.
 
-; CHECK:      --- !Passed
+; CHECK:      --- !Missed
 ; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Name:            InterchangeNotProfitable
 ; CHECK-NEXT: Function:        interchange_unnecesasry_for_vectorization
 ; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+; CHECK-NEXT:   - String:          Insufficient information to calculate the cost of loop for interchange.
 define void @interchange_unnecesasry_for_vectorization() {
 entry:
   br label %for.i.header

``````````

</details>


https://github.com/llvm/llvm-project/pull/133672


More information about the llvm-branch-commits mailing list