[llvm] [LoopInterchange] Defer CacheCost calculation until needed (PR #146874)

Tue Jul 8 00:20:10 PDT 2025

https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/146874

>From 72e3e12480b763241f6659767edb2a2fb06c77c7 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 3 Jul 2025 11:18:51 +0000
Subject: [PATCH 1/5] [LoopInterchange] Defer CacheCost calculation until
 needed

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 96 +++++++++++++++----
 .../delay-cachecost-calculation.ll            | 77 +++++++++++++++
 2 files changed, 153 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5bb5f749d9f1a..9b3bb2053961e 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -407,6 +407,33 @@ class LoopInterchangeLegality {
   SmallVector<PHINode *, 8> InnerLoopInductions;
 };
 
+/// Manages information utilized by the profitability check for cache. The main
+/// purpose of this class is to delay the computation of CacheCost until it is
+/// actually needed.
+class LoopInterchangeCacheCostManager {
+  Loop *OutermostLoop;
+  LoopStandardAnalysisResults *AR;
+  DependenceInfo *DI;
+
+  /// CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
+  /// that the result can be nullptr.
+  std::optional<std::unique_ptr<CacheCost>> CC;
+
+  /// Maps each loop to an index representing the optimal position within the
+  /// loop-nest, as determined by the cache cost analysis.
+  DenseMap<const Loop *, unsigned> CostMap;
+
+  void computeIfUnitinialized();
+
+public:
+  LoopInterchangeCacheCostManager(Loop *OutermostLoop,
+                                  LoopStandardAnalysisResults *AR,
+                                  DependenceInfo *DI)
+      : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
+  std::unique_ptr<CacheCost> &getCacheCost();
+  const DenseMap<const Loop *, unsigned> &getCostMap();
+};
+
 /// LoopInterchangeProfitability checks if it is profitable to interchange the
 /// loop.
 class LoopInterchangeProfitability {
@@ -419,8 +446,7 @@ class LoopInterchangeProfitability {
   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
                     unsigned InnerLoopId, unsigned OuterLoopId,
                     CharMatrix &DepMatrix,
-                    const DenseMap<const Loop *, unsigned> &CostMap,
-                    std::unique_ptr<CacheCost> &CC);
+                    LoopInterchangeCacheCostManager &LICCM);
 
 private:
   int getInstrOrderCost();
@@ -477,15 +503,15 @@ struct LoopInterchange {
   LoopInfo *LI = nullptr;
   DependenceInfo *DI = nullptr;
   DominatorTree *DT = nullptr;
-  std::unique_ptr<CacheCost> CC = nullptr;
+  LoopStandardAnalysisResults *AR = nullptr;
 
   /// Interface to emit optimization remarks.
   OptimizationRemarkEmitter *ORE;
 
   LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
-                  DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
+                  DominatorTree *DT, LoopStandardAnalysisResults *AR,
                   OptimizationRemarkEmitter *ORE)
-      : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
+      : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
 
   bool run(Loop *L) {
     if (L->getParentLoop())
@@ -548,11 +574,12 @@ struct LoopInterchange {
     // indicates the loop should be placed as the innermost loop.
     //
     // For the old pass manager CacheCost would be null.
-    DenseMap<const Loop *, unsigned> CostMap;
-    if (CC != nullptr) {
-      for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
-        CostMap[Cost.first] = Idx;
-    }
+    // DenseMap<const Loop *, unsigned> CostMap;
+    // if (CC != nullptr) {
+    //   for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
+    //     CostMap[Cost.first] = Idx;
+    // }
+    LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
     // We try to achieve the globally optimal memory access for the loopnest,
     // and do interchange based on a bubble-sort fasion. We start from
     // the innermost loop, move it outwards to the best possible position
@@ -561,7 +588,7 @@ struct LoopInterchange {
       bool ChangedPerIter = false;
       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
         bool Interchanged =
-            processLoop(LoopList, i, i - 1, DependencyMatrix, CostMap);
+            processLoop(LoopList, i, i - 1, DependencyMatrix, LICCM);
         ChangedPerIter |= Interchanged;
         Changed |= Interchanged;
       }
@@ -576,7 +603,7 @@ struct LoopInterchange {
   bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
                    unsigned OuterLoopId,
                    std::vector<std::vector<char>> &DependencyMatrix,
-                   const DenseMap<const Loop *, unsigned> &CostMap) {
+                   LoopInterchangeCacheCostManager &LICCM) {
     Loop *OuterLoop = LoopList[OuterLoopId];
     Loop *InnerLoop = LoopList[InnerLoopId];
     LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
@@ -589,7 +616,7 @@ struct LoopInterchange {
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
-                          DependencyMatrix, CostMap, CC)) {
+                          DependencyMatrix, LICCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
     }
@@ -1122,6 +1149,36 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
   return true;
 }
 
+void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
+  if (CC.has_value())
+    return;
+
+  LLVM_DEBUG(dbgs() << "Compute CacheCost.\n");
+  CC = CacheCost::getCacheCost(*OutermostLoop, *AR, *DI);
+  // Obtain the loop vector returned from loop cache analysis beforehand,
+  // and put each <Loop, index> pair into a map for constant time query
+  // later. Indices in loop vector reprsent the optimal order of the
+  // corresponding loop, e.g., given a loopnest with depth N, index 0
+  // indicates the loop should be placed as the outermost loop and index N
+  // indicates the loop should be placed as the innermost loop.
+  //
+  // For the old pass manager CacheCost would be null.
+  if (*CC != nullptr)
+    for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
+      CostMap[Cost.first] = Idx;
+}
+
+std::unique_ptr<CacheCost> &LoopInterchangeCacheCostManager::getCacheCost() {
+  computeIfUnitinialized();
+  return *CC;
+}
+
+const DenseMap<const Loop *, unsigned> &
+LoopInterchangeCacheCostManager::getCostMap() {
+  computeIfUnitinialized();
+  return CostMap;
+}
+
 int LoopInterchangeProfitability::getInstrOrderCost() {
   unsigned GoodOrder, BadOrder;
   BadOrder = GoodOrder = 0;
@@ -1247,8 +1304,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
     unsigned OuterLoopId, CharMatrix &DepMatrix,
-    const DenseMap<const Loop *, unsigned> &CostMap,
-    std::unique_ptr<CacheCost> &CC) {
+    LoopInterchangeCacheCostManager &LICCM) {
   // isProfitable() is structured to avoid endless loop interchange. If the
   // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
   // decide the profitability then, profitability check will stop and return the
@@ -1261,9 +1317,12 @@ bool LoopInterchangeProfitability::isProfitable(
   std::optional<bool> shouldInterchange;
   for (RuleTy RT : Profitabilities) {
     switch (RT) {
-    case RuleTy::PerLoopCacheAnalysis:
+    case RuleTy::PerLoopCacheAnalysis: {
+      std::unique_ptr<CacheCost> &CC = LICCM.getCacheCost();
+      const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
       shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
       break;
+    }
     case RuleTy::PerInstrOrderCost:
       shouldInterchange = isProfitablePerInstrOrderCost();
       break;
@@ -1841,10 +1900,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
   });
 
   DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
-  std::unique_ptr<CacheCost> CC =
-      CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
-
-  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
+  if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);
   return getLoopPassPreservedAnalyses();
diff --git a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
new file mode 100644
index 0000000000000..69ee30eadade7
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
@@ -0,0 +1,77 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-interchange -debug -disable-output %s 2>&1 | FileCheck %s
+
+ at A = global [16 x [16 x i32]] zeroinitializer
+
+; Check that the CacheCost is calculated only when required. In this case, it
+; is computed after passing the legality check.
+;
+; for (i = 0; i < 16; i++)
+;   for (j = 0; j < 16; j++)
+;     A[j][i] += 1;
+
+; CHECK: Loops are legal to interchange
+; CHECK: Compute CacheCost
+define void @legal_to_interchange() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
+  br label %for.j
+
+for.j:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
+  %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+  %val = load i32, ptr %idx
+  %inc = add i32 %val, 1
+  store i32 %inc, ptr %idx
+  %j.next = add i32 %j, 1
+  %j.exit = icmp eq i32 %j.next, 16
+  br i1 %j.exit, label %for.i.latch, label %for.j
+
+for.i.latch:
+  %i.next = add i32 %i, 1
+  %i.exit = icmp eq i32 %i.next, 16
+  br i1 %i.exit, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; Check that the CacheCost is not calculated when not required. In this case,
+; the legality check always fails so that we do not need to compute the
+; CacheCost.
+;
+; for (i = 0; i < 16; i++)
+;   for (j = 0; j < 16; j++)
+;     A[j][i] = A[i][j];
+
+; CHECK-NOT: Compute CacheCost
+define void @illegal_to_interchange() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
+  br label %for.j
+
+for.j:
+  %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
+  %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %i, i32 %j
+  %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+  %val = load i32, ptr %idx.load
+  store i32 %val, ptr %idx.store
+  %j.next = add i32 %j, 1
+  %j.exit = icmp eq i32 %j.next, 16
+  br i1 %j.exit, label %for.i.latch, label %for.j
+
+for.i.latch:
+  %i.next = add i32 %i, 1
+  %i.exit = icmp eq i32 %i.next, 16
+  br i1 %i.exit, label %exit, label %for.i.header
+
+exit:
+  ret void
+}

>From 23771cad96d74123362883c03129caaa4ba0bf03 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 3 Jul 2025 22:37:21 +0900
Subject: [PATCH 2/5] Clean up commented-out code

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9b3bb2053961e..b8c871814e9ab 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -566,19 +566,6 @@ struct LoopInterchange {
     }
 
     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
-    // Obtain the loop vector returned from loop cache analysis beforehand,
-    // and put each <Loop, index> pair into a map for constant time query
-    // later. Indices in loop vector reprsent the optimal order of the
-    // corresponding loop, e.g., given a loopnest with depth N, index 0
-    // indicates the loop should be placed as the outermost loop and index N
-    // indicates the loop should be placed as the innermost loop.
-    //
-    // For the old pass manager CacheCost would be null.
-    // DenseMap<const Loop *, unsigned> CostMap;
-    // if (CC != nullptr) {
-    //   for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
-    //     CostMap[Cost.first] = Idx;
-    // }
     LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
     // We try to achieve the globally optimal memory access for the loopnest,
     // and do interchange based on a bubble-sort fasion. We start from

>From e856189e01dd04e3fa90ab3a9a04e2486925c6a7 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 7 Jul 2025 11:52:39 +0000
Subject: [PATCH 3/5] Change the types

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b8c871814e9ab..dd067fd9aa089 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -430,7 +430,7 @@ class LoopInterchangeCacheCostManager {
                                   LoopStandardAnalysisResults *AR,
                                   DependenceInfo *DI)
       : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
-  std::unique_ptr<CacheCost> &getCacheCost();
+  CacheCost *getCacheCost();
   const DenseMap<const Loop *, unsigned> &getCostMap();
 };
 
@@ -451,8 +451,7 @@ class LoopInterchangeProfitability {
 private:
   int getInstrOrderCost();
   std::optional<bool> isProfitablePerLoopCacheAnalysis(
-      const DenseMap<const Loop *, unsigned> &CostMap,
-      std::unique_ptr<CacheCost> &CC);
+      const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
   std::optional<bool> isProfitablePerInstrOrderCost();
   std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
                                                    unsigned OuterLoopId,
@@ -1155,9 +1154,9 @@ void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
       CostMap[Cost.first] = Idx;
 }
 
-std::unique_ptr<CacheCost> &LoopInterchangeCacheCostManager::getCacheCost() {
+CacheCost *LoopInterchangeCacheCostManager::getCacheCost() {
   computeIfUnitinialized();
-  return *CC;
+  return CC->get();
 }
 
 const DenseMap<const Loop *, unsigned> &
@@ -1221,8 +1220,7 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
 
 std::optional<bool>
 LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
-    const DenseMap<const Loop *, unsigned> &CostMap,
-    std::unique_ptr<CacheCost> &CC) {
+    const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
   // This is the new cost model returned from loop cache analysis.
   // A smaller index means the loop should be placed an outer loop, and vice
   // versa.
@@ -1305,7 +1303,7 @@ bool LoopInterchangeProfitability::isProfitable(
   for (RuleTy RT : Profitabilities) {
     switch (RT) {
     case RuleTy::PerLoopCacheAnalysis: {
-      std::unique_ptr<CacheCost> &CC = LICCM.getCacheCost();
+      CacheCost *CC = LICCM.getCacheCost();
       const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
       shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
       break;

>From f95a5dd38f6bf6d4d3bf0c7982e3cfe226d03874 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 7 Jul 2025 11:55:59 +0000
Subject: [PATCH 4/5] Change the class name

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 32 ++++++++-----------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index dd067fd9aa089..a5008907b9014 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -410,7 +410,7 @@ class LoopInterchangeLegality {
 /// Manages information utilized by the profitability check for cache. The main
 /// purpose of this class is to delay the computation of CacheCost until it is
 /// actually needed.
-class LoopInterchangeCacheCostManager {
+class CacheCostManager {
   Loop *OutermostLoop;
   LoopStandardAnalysisResults *AR;
   DependenceInfo *DI;
@@ -426,9 +426,8 @@ class LoopInterchangeCacheCostManager {
   void computeIfUnitinialized();
 
 public:
-  LoopInterchangeCacheCostManager(Loop *OutermostLoop,
-                                  LoopStandardAnalysisResults *AR,
-                                  DependenceInfo *DI)
+  CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
+                   DependenceInfo *DI)
       : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
   CacheCost *getCacheCost();
   const DenseMap<const Loop *, unsigned> &getCostMap();
@@ -445,8 +444,7 @@ class LoopInterchangeProfitability {
   /// Check if the loop interchange is profitable.
   bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
                     unsigned InnerLoopId, unsigned OuterLoopId,
-                    CharMatrix &DepMatrix,
-                    LoopInterchangeCacheCostManager &LICCM);
+                    CharMatrix &DepMatrix, CacheCostManager &CCM);
 
 private:
   int getInstrOrderCost();
@@ -565,7 +563,7 @@ struct LoopInterchange {
     }
 
     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
-    LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
+    CacheCostManager CCM(LoopList[0], AR, DI);
     // We try to achieve the globally optimal memory access for the loopnest,
     // and do interchange based on a bubble-sort fasion. We start from
     // the innermost loop, move it outwards to the best possible position
@@ -574,7 +572,7 @@ struct LoopInterchange {
       bool ChangedPerIter = false;
       for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
         bool Interchanged =
-            processLoop(LoopList, i, i - 1, DependencyMatrix, LICCM);
+            processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
         ChangedPerIter |= Interchanged;
         Changed |= Interchanged;
       }
@@ -589,7 +587,7 @@ struct LoopInterchange {
   bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
                    unsigned OuterLoopId,
                    std::vector<std::vector<char>> &DependencyMatrix,
-                   LoopInterchangeCacheCostManager &LICCM) {
+                   CacheCostManager &CCM) {
     Loop *OuterLoop = LoopList[OuterLoopId];
     Loop *InnerLoop = LoopList[InnerLoopId];
     LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
@@ -602,7 +600,7 @@ struct LoopInterchange {
     LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
     if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
-                          DependencyMatrix, LICCM)) {
+                          DependencyMatrix, CCM)) {
       LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
       return false;
     }
@@ -1135,7 +1133,7 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
   return true;
 }
 
-void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
+void CacheCostManager::computeIfUnitinialized() {
   if (CC.has_value())
     return;
 
@@ -1154,13 +1152,12 @@ void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
       CostMap[Cost.first] = Idx;
 }
 
-CacheCost *LoopInterchangeCacheCostManager::getCacheCost() {
+CacheCost *CacheCostManager::getCacheCost() {
   computeIfUnitinialized();
   return CC->get();
 }
 
-const DenseMap<const Loop *, unsigned> &
-LoopInterchangeCacheCostManager::getCostMap() {
+const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
   computeIfUnitinialized();
   return CostMap;
 }
@@ -1288,8 +1285,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
 
 bool LoopInterchangeProfitability::isProfitable(
     const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
-    unsigned OuterLoopId, CharMatrix &DepMatrix,
-    LoopInterchangeCacheCostManager &LICCM) {
+    unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
   // isProfitable() is structured to avoid endless loop interchange. If the
   // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
   // decide the profitability then, profitability check will stop and return the
@@ -1303,8 +1299,8 @@ bool LoopInterchangeProfitability::isProfitable(
   for (RuleTy RT : Profitabilities) {
     switch (RT) {
     case RuleTy::PerLoopCacheAnalysis: {
-      CacheCost *CC = LICCM.getCacheCost();
-      const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
+      CacheCost *CC = CCM.getCacheCost();
+      const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
       shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
       break;
     }

>From a3549b00e6742744b95f9325000c9b89a8356e2f Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 07:19:03 +0000
Subject: [PATCH 5/5] Fix test

---
 .../LoopInterchange/delay-cachecost-calculation.ll          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
index 69ee30eadade7..d80b4420c034c 100644
--- a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
+++ b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
@@ -23,7 +23,7 @@ for.i.header:
 
 for.j:
   %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
-  %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+  %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
   %val = load i32, ptr %idx
   %inc = add i32 %val, 1
   store i32 %inc, ptr %idx
@@ -59,8 +59,8 @@ for.i.header:
 
 for.j:
   %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
-  %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %i, i32 %j
-  %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+  %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %i, i32 %j
+  %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
   %val = load i32, ptr %idx.load
   store i32 %val, ptr %idx.store
   %j.next = add i32 %j, 1