[llvm] [LoopInterchange] Add an option to control the cost heuristics applied (PR #133664)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 30 18:14:31 PDT 2025


https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/133664

LoopInterchange has several heuristic functions to determine if exchanging two loops is profitable or not. Whether or not to use each heuristic and the order in which to use them were fixed, but #125830 allows them to be changed internally at will. This patch adds a new option to control them via the compiler option.

The previous patch also added an option to prioritize the vectorization heuristic. This patch also removes it to avoid conflicts between it and the newly introduced one, e.g., both
`-loop-interchange-prioritize-vectorization=1` and `-loop-interchange-profitabilities='cache,vectorization'` are specified.

>From 91448733582cacc87cc8105cdf36154ee29e0a99 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 26 Mar 2025 13:24:50 +0000
Subject: [PATCH] [LoopInterchange] Add an option to control the cost
 heuristics applied

LoopInterchange has several heuristic functions to determine if
exchanging two loops is profitable or not. Whether or not to use each
heuristic and the order in which to use them were fixed, but #125830
allows them to be changed internally at will. This patch adds a new
option to control them via the compiler option.

The previous patch also added an option to prioritize the vectorization
heuristic. This patch also removes it to avoid conflicts between it and
the newly introduced one, e.g., both
`-loop-interchange-prioritize-vectorization=1` and
`-loop-interchange-profitabilities='cache,vectorization'` are specified.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 56 +++++++++++--------
 .../profitability-vectorization.ll            |  2 +-
 2 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 4366418b2379d..e777f950a7c5a 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Scalar/LoopInterchange.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
@@ -72,6 +73,13 @@ using LoopVector = SmallVector<Loop *, 8>;
 // TODO: Check if we can use a sparse matrix here.
 using CharMatrix = std::vector<std::vector<char>>;
 
+/// Types of rules used in profitability check.
+enum class RuleTy {
+  PerLoopCacheAnalysis,
+  PerInstrOrderCost,
+  ForVectorization,
+};
+
 } // end anonymous namespace
 
 // Minimum loop depth supported.
@@ -84,12 +92,31 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
     "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
     cl::desc("Maximum depth of loop nest considered for the transform"));
 
-static cl::opt<bool> PrioritizeVectorization(
-    "loop-interchange-prioritize-vectorization", cl::init(false), cl::Hidden,
-    cl::desc("Prioritize increasing vectorization opportunity over cache cost "
-             "when determining profitability"));
+// We prefer cache cost to vectorization by default.
+static cl::list<RuleTy> Profitabilities(
+    "loop-interchange-profitabilities", cl::ZeroOrMore,
+    cl::MiscFlags::CommaSeparated, cl::Hidden,
+    cl::desc("List of profitability heuristics to be used. They are applied in "
+             "the given order"),
+    cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
+                           RuleTy::PerInstrOrderCost,
+                           RuleTy::ForVectorization}),
+    cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
+                          "Prioritize loop cache cost"),
+               clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
+                          "Prioritize the IVs order of each instruction"),
+               clEnumValN(RuleTy::ForVectorization, "vectorize",
+                          "Prioritize vectorization")));
 
 #ifndef NDEBUG
+static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
+  SmallSet<RuleTy, 4> Set;
+  for (RuleTy Rule : Rules)
+    if (!Set.insert(Rule).second)
+      return false;
+  return true;
+}
+
 static void printDepMatrix(CharMatrix &DepMatrix) {
   for (auto &Row : DepMatrix) {
     for (auto D : Row)
@@ -1204,26 +1231,9 @@ bool LoopInterchangeProfitability::isProfitable(
   // second highest priority rule (isProfitablePerInstrOrderCost by default).
   // Likewise, if it failed to analysis the profitability then only, the last
   // rule (isProfitableForVectorization by default) will decide.
-  enum class RuleTy {
-    PerLoopCacheAnalysis,
-    PerInstrOrderCost,
-    ForVectorization,
-  };
-
-  // We prefer cache cost to vectorization by default.
-  RuleTy RuleOrder[3] = {RuleTy::PerLoopCacheAnalysis,
-                         RuleTy::PerInstrOrderCost, RuleTy::ForVectorization};
-
-  // If we prefer vectorization to cache cost, change the order of application
-  // of each rule.
-  if (PrioritizeVectorization) {
-    RuleOrder[0] = RuleTy::ForVectorization;
-    RuleOrder[1] = RuleTy::PerLoopCacheAnalysis;
-    RuleOrder[2] = RuleTy::PerInstrOrderCost;
-  }
-
+  assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
   std::optional<bool> shouldInterchange;
-  for (RuleTy RT : RuleOrder) {
+  for (RuleTy RT : Profitabilities) {
     switch (RT) {
     case RuleTy::PerLoopCacheAnalysis:
       shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index 0018aa0308f28..85be48cb9a710 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -3,7 +3,7 @@
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
-; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-prioritize-vectorization=1
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
 
 @A = dso_local global [256 x [256 x float]] zeroinitializer



More information about the llvm-commits mailing list