[llvm] [LoopInterchange] Ignore the cost-model, force interchange if legal (PR #148858)

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 17 06:54:11 PDT 2025


https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/148858

>From db9c37e08f5fbb4458ea87f539eae41c275a8a2e Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Tue, 15 Jul 2025 04:04:01 -0700
Subject: [PATCH 1/6] [LoopInterchange] Ignore the cost-model, force
 interchange if legal

This is useful for testing purposes, to get more test coverage.
---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp             | 7 ++++++-
 .../LoopInterchange/profitability-vectorization.ll         | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a5008907b9014..b62c25d52861b 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -66,6 +66,10 @@ static cl::opt<unsigned int> MaxMemInstrCount(
         "in the dependency matrix. Higher value may lead to more interchanges "
         "at the cost of compile-time"));
 
+static cl::opt<bool> ForceLoopInterchange(
+    "loop-interchange-force", cl::init(false), cl::Hidden,
+    cl::desc("Ignore the cost model, and force interchange if it is legal"));
+
 namespace {
 
 using LoopVector = SmallVector<Loop *, 8>;
@@ -599,7 +603,8 @@ struct LoopInterchange {
     }
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
-    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+    if (!ForceLoopInterchange &&
+        !LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
                           DependencyMatrix, CCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index 16952a66aa78e..59196bbbb9c00 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -2,6 +2,10 @@
 ; RUN:     -pass-remarks-output=%t -disable-output
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-force=true
+; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
+
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
 ; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s

>From fe979eaa158f7298a4dfa28d012b72c70c919ddc Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Wed, 16 Jul 2025 07:55:55 -0700
Subject: [PATCH 2/6] Add an ignore value to the existing profitability option.

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 17 ++++++-
 .../LoopInterchange/force-interchange.ll      | 50 +++++++++++++++++++
 .../profitability-vectorization.ll            |  2 +-
 3 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/force-interchange.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b62c25d52861b..4c16570f52ec9 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -82,6 +82,7 @@ enum class RuleTy {
   PerLoopCacheAnalysis,
   PerInstrOrderCost,
   ForVectorization,
+  Ignore
 };
 
 } // end anonymous namespace
@@ -110,7 +111,10 @@ static cl::list<RuleTy> Profitabilities(
                clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
                           "Prioritize the IVs order of each instruction"),
                clEnumValN(RuleTy::ForVectorization, "vectorize",
-                          "Prioritize vectorization")));
+                          "Prioritize vectorization"),
+               clEnumValN(RuleTy::Ignore, "ignore",
+                          "Ignore profitability, force interchange (does not "
+			  "work with other options)")));
 
 #ifndef NDEBUG
 static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
@@ -1291,6 +1295,12 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
+
+  // Return true if interchange is forced.
+  if (Profitabilities.size() == 1 &&
+      Profitabilities[0] == RuleTy::Ignore)
+    return true;
+
   // isProfitable() is structured to avoid endless loop interchange. If the
   // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
   // decide the profitability then, profitability check will stop and return the
@@ -1316,6 +1326,11 @@ bool LoopInterchangeProfitability::isProfitable(
       shouldInterchange =
           isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
       break;
+    case RuleTy::Ignore:
+      // TODO? We ignore the force option when it appears in a list, i.e. it
+      // should occur as the only option to be effective, as mentioned in the
+      // help.
+      break;
     }
 
     // If this rule could determine the profitability, don't call subsequent
diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
new file mode 100644
index 0000000000000..14af2ba5f1d3f
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -passes=loop-interchange -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore -S
+; RUN: FileCheck --input-file=%t %s
+
+; There should be no reason to interchange this, unless it is forced.
+;
+;     for (int i = 0; i<N; i++)
+;       for (int j = 0; j<N; j++)
+;         A[i][j] = 42;
+;
+; CHECK:      --- !Analysis
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Dependence
+; CHECK-NEXT: Function:        f
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
+; CHECK-NEXT: ...
+; CHECK-NEXT: --- !Passed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            Interchanged
+; CHECK-NEXT: Function:        f
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:          Loop interchanged with enclosing loop.
+; CHECK-NEXT: ...
+
+ at A = dso_local local_unnamed_addr global [1024 x [1024 x i32]] zeroinitializer, align 4
+
+define dso_local void @f() local_unnamed_addr #0 {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %indvars.iv17 = phi i64 [ 0, %entry ], [ %indvars.iv.next18, %for.cond.cleanup3 ]
+  br label %for.body4
+
+for.cond.cleanup:
+  ret void
+
+for.cond.cleanup3:
+  %indvars.iv.next18 = add nuw nsw i64 %indvars.iv17, 1
+  %exitcond20.not = icmp eq i64 %indvars.iv.next18, 1024
+  br i1 %exitcond20.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.body4:
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %arrayidx6 = getelementptr inbounds nuw [1024 x [1024 x i32]], ptr @A, i64 0, i64 %indvars.iv17, i64 %indvars.iv
+  store i32 42, ptr %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+}
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index 59196bbbb9c00..d86c3d959e6ac 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -3,7 +3,7 @@
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
-; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-force=true
+; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore 
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
 
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \

>From 5bb2158865bea3b144d7c0f110a73a51ea29f0a4 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:06:24 -0700
Subject: [PATCH 3/6] Remove the old option, we don't need that anymore, left
 it accidentally there.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 4c16570f52ec9..3297272c2904f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -66,10 +66,6 @@ static cl::opt<unsigned int> MaxMemInstrCount(
         "in the dependency matrix. Higher value may lead to more interchanges "
         "at the cost of compile-time"));
 
-static cl::opt<bool> ForceLoopInterchange(
-    "loop-interchange-force", cl::init(false), cl::Hidden,
-    cl::desc("Ignore the cost model, and force interchange if it is legal"));
-
 namespace {
 
 using LoopVector = SmallVector<Loop *, 8>;
@@ -607,8 +603,7 @@ struct LoopInterchange {
     }
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
-    if (!ForceLoopInterchange &&
-        !LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
+    if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
                           DependencyMatrix, CCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;

>From 1e8e6673d1ef0a04e494cdea22e8c44b9aff8b90 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:10:43 -0700
Subject: [PATCH 4/6] Fix formatting.

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 3297272c2904f..62c33446fb5ff 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -110,7 +110,7 @@ static cl::list<RuleTy> Profitabilities(
                           "Prioritize vectorization"),
                clEnumValN(RuleTy::Ignore, "ignore",
                           "Ignore profitability, force interchange (does not "
-			  "work with other options)")));
+                          "work with other options)")));
 
 #ifndef NDEBUG
 static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
@@ -1292,8 +1292,7 @@ bool LoopInterchangeProfitability::isProfitable(
     unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
 
   // Return true if interchange is forced.
-  if (Profitabilities.size() == 1 &&
-      Profitabilities[0] == RuleTy::Ignore)
+  if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
     return true;
 
   // isProfitable() is structured to avoid endless loop interchange. If the

>From 6a3b3f03623113e7b646f438fd506ebf26ac512f Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 14:33:13 +0100
Subject: [PATCH 5/6] Update
 llvm/test/Transforms/LoopInterchange/force-interchange.ll

Co-authored-by: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
---
 llvm/test/Transforms/LoopInterchange/force-interchange.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
index 14af2ba5f1d3f..7504c96a9e7fc 100644
--- a/llvm/test/Transforms/LoopInterchange/force-interchange.ll
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -3,8 +3,8 @@
 
 ; There should be no reason to interchange this, unless it is forced.
 ;
-;     for (int i = 0; i<N; i++)
-;       for (int j = 0; j<N; j++)
+;     for (int i = 0; i<1024; i++)
+;       for (int j = 0; j<1024; j++)
 ;         A[i][j] = 42;
 ;
 ; CHECK:      --- !Analysis

>From 82a3043df9371d09e87b10c699eda696ad203ed3 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Thu, 17 Jul 2025 06:53:30 -0700
Subject: [PATCH 6/6] Addressed review comments

---
 .../test/Transforms/LoopInterchange/force-interchange.ll | 9 +--------
 .../LoopInterchange/profitability-vectorization.ll       | 4 ----
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/force-interchange.ll b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
index 7504c96a9e7fc..a0f2064516675 100644
--- a/llvm/test/Transforms/LoopInterchange/force-interchange.ll
+++ b/llvm/test/Transforms/LoopInterchange/force-interchange.ll
@@ -7,14 +7,7 @@
 ;       for (int j = 0; j<1024; j++)
 ;         A[i][j] = 42;
 ;
-; CHECK:      --- !Analysis
-; CHECK-NEXT: Pass:            loop-interchange
-; CHECK-NEXT: Name:            Dependence
-; CHECK-NEXT: Function:        f
-; CHECK-NEXT: Args:
-; CHECK-NEXT:   - String:          Computed dependence info, invoking the transform.
-; CHECK-NEXT: ...
-; CHECK-NEXT: --- !Passed
+; CHECK:      --- !Passed
 ; CHECK-NEXT: Pass:            loop-interchange
 ; CHECK-NEXT: Name:            Interchanged
 ; CHECK-NEXT: Function:        f
diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
index d86c3d959e6ac..16952a66aa78e 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll
@@ -2,10 +2,6 @@
 ; RUN:     -pass-remarks-output=%t -disable-output
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
 
-; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
-; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=ignore 
-; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
-
 ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
 ; RUN:     -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
 ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s



More information about the llvm-commits mailing list