[llvm] [LoopInterchange] Defer CacheCost calculation until needed (PR #146874)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 8 00:20:10 PDT 2025
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/146874
>From 72e3e12480b763241f6659767edb2a2fb06c77c7 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 3 Jul 2025 11:18:51 +0000
Subject: [PATCH 1/5] [LoopInterchange] Defer CacheCost calculation until
needed
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 96 +++++++++++++++----
.../delay-cachecost-calculation.ll | 77 +++++++++++++++
2 files changed, 153 insertions(+), 20 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 5bb5f749d9f1a..9b3bb2053961e 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -407,6 +407,33 @@ class LoopInterchangeLegality {
SmallVector<PHINode *, 8> InnerLoopInductions;
};
+/// Manages information utilized by the profitability check for cache. The main
+/// purpose of this class is to delay the computation of CacheCost until it is
+/// actually needed.
+class LoopInterchangeCacheCostManager {
+ Loop *OutermostLoop;
+ LoopStandardAnalysisResults *AR;
+ DependenceInfo *DI;
+
+ /// CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
+ /// that the result can be nullptr.
+ std::optional<std::unique_ptr<CacheCost>> CC;
+
+ /// Maps each loop to an index representing the optimal position within the
+ /// loop-nest, as determined by the cache cost analysis.
+ DenseMap<const Loop *, unsigned> CostMap;
+
+ void computeIfUnitinialized();
+
+public:
+ LoopInterchangeCacheCostManager(Loop *OutermostLoop,
+ LoopStandardAnalysisResults *AR,
+ DependenceInfo *DI)
+ : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
+ std::unique_ptr<CacheCost> &getCacheCost();
+ const DenseMap<const Loop *, unsigned> &getCostMap();
+};
+
/// LoopInterchangeProfitability checks if it is profitable to interchange the
/// loop.
class LoopInterchangeProfitability {
@@ -419,8 +446,7 @@ class LoopInterchangeProfitability {
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix,
- const DenseMap<const Loop *, unsigned> &CostMap,
- std::unique_ptr<CacheCost> &CC);
+ LoopInterchangeCacheCostManager &LICCM);
private:
int getInstrOrderCost();
@@ -477,15 +503,15 @@ struct LoopInterchange {
LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
- std::unique_ptr<CacheCost> CC = nullptr;
+ LoopStandardAnalysisResults *AR = nullptr;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
- DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
+ DominatorTree *DT, LoopStandardAnalysisResults *AR,
OptimizationRemarkEmitter *ORE)
- : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
+ : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
bool run(Loop *L) {
if (L->getParentLoop())
@@ -548,11 +574,12 @@ struct LoopInterchange {
// indicates the loop should be placed as the innermost loop.
//
// For the old pass manager CacheCost would be null.
- DenseMap<const Loop *, unsigned> CostMap;
- if (CC != nullptr) {
- for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
- CostMap[Cost.first] = Idx;
- }
+ // DenseMap<const Loop *, unsigned> CostMap;
+ // if (CC != nullptr) {
+ // for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
+ // CostMap[Cost.first] = Idx;
+ // }
+ LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position
@@ -561,7 +588,7 @@ struct LoopInterchange {
bool ChangedPerIter = false;
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
bool Interchanged =
- processLoop(LoopList, i, i - 1, DependencyMatrix, CostMap);
+ processLoop(LoopList, i, i - 1, DependencyMatrix, LICCM);
ChangedPerIter |= Interchanged;
Changed |= Interchanged;
}
@@ -576,7 +603,7 @@ struct LoopInterchange {
bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
unsigned OuterLoopId,
std::vector<std::vector<char>> &DependencyMatrix,
- const DenseMap<const Loop *, unsigned> &CostMap) {
+ LoopInterchangeCacheCostManager &LICCM) {
Loop *OuterLoop = LoopList[OuterLoopId];
Loop *InnerLoop = LoopList[InnerLoopId];
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
@@ -589,7 +616,7 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
- DependencyMatrix, CostMap, CC)) {
+ DependencyMatrix, LICCM)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -1122,6 +1149,36 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return true;
}
+void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
+ if (CC.has_value())
+ return;
+
+ LLVM_DEBUG(dbgs() << "Compute CacheCost.\n");
+ CC = CacheCost::getCacheCost(*OutermostLoop, *AR, *DI);
+ // Obtain the loop vector returned from loop cache analysis beforehand,
+ // and put each <Loop, index> pair into a map for constant time query
+ // later. Indices in loop vector reprsent the optimal order of the
+ // corresponding loop, e.g., given a loopnest with depth N, index 0
+ // indicates the loop should be placed as the outermost loop and index N
+ // indicates the loop should be placed as the innermost loop.
+ //
+ // For the old pass manager CacheCost would be null.
+ if (*CC != nullptr)
+ for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts()))
+ CostMap[Cost.first] = Idx;
+}
+
+std::unique_ptr<CacheCost> &LoopInterchangeCacheCostManager::getCacheCost() {
+ computeIfUnitinialized();
+ return *CC;
+}
+
+const DenseMap<const Loop *, unsigned> &
+LoopInterchangeCacheCostManager::getCostMap() {
+ computeIfUnitinialized();
+ return CostMap;
+}
+
int LoopInterchangeProfitability::getInstrOrderCost() {
unsigned GoodOrder, BadOrder;
BadOrder = GoodOrder = 0;
@@ -1247,8 +1304,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
bool LoopInterchangeProfitability::isProfitable(
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix,
- const DenseMap<const Loop *, unsigned> &CostMap,
- std::unique_ptr<CacheCost> &CC) {
+ LoopInterchangeCacheCostManager &LICCM) {
// isProfitable() is structured to avoid endless loop interchange. If the
// highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
// decide the profitability then, profitability check will stop and return the
@@ -1261,9 +1317,12 @@ bool LoopInterchangeProfitability::isProfitable(
std::optional<bool> shouldInterchange;
for (RuleTy RT : Profitabilities) {
switch (RT) {
- case RuleTy::PerLoopCacheAnalysis:
+ case RuleTy::PerLoopCacheAnalysis: {
+ std::unique_ptr<CacheCost> &CC = LICCM.getCacheCost();
+ const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
break;
+ }
case RuleTy::PerInstrOrderCost:
shouldInterchange = isProfitablePerInstrOrderCost();
break;
@@ -1841,10 +1900,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
});
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
- std::unique_ptr<CacheCost> CC =
- CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
-
- if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
+ if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
return getLoopPassPreservedAnalyses();
diff --git a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
new file mode 100644
index 0000000000000..69ee30eadade7
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
@@ -0,0 +1,77 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-interchange -debug -disable-output %s 2>&1 | FileCheck %s
+
+ at A = global [16 x [16 x i32]] zeroinitializer
+
+; Check that the CacheCost is calculated only when required. In this case, it
+; is computed after passing the legality check.
+;
+; for (i = 0; i < 16; i++)
+; for (j = 0; j < 16; j++)
+; A[j][i] += 1;
+
+; CHECK: Loops are legal to interchange
+; CHECK: Compute CacheCost
+define void @legal_to_interchange() {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
+ %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+ %val = load i32, ptr %idx
+ %inc = add i32 %val, 1
+ store i32 %inc, ptr %idx
+ %j.next = add i32 %j, 1
+ %j.exit = icmp eq i32 %j.next, 16
+ br i1 %j.exit, label %for.i.latch, label %for.j
+
+for.i.latch:
+ %i.next = add i32 %i, 1
+ %i.exit = icmp eq i32 %i.next, 16
+ br i1 %i.exit, label %exit, label %for.i.header
+
+exit:
+ ret void
+}
+
+; Check that the CacheCost is not calculated when not required. In this case,
+; the legality check always fails so that we do not need to compute the
+; CacheCost.
+;
+; for (i = 0; i < 16; i++)
+; for (j = 0; j < 16; j++)
+; A[j][i] = A[i][j];
+
+; CHECK-NOT: Compute CacheCost
+define void @illegal_to_interchange() {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
+ %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %i, i32 %j
+ %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+ %val = load i32, ptr %idx.load
+ store i32 %val, ptr %idx.store
+ %j.next = add i32 %j, 1
+ %j.exit = icmp eq i32 %j.next, 16
+ br i1 %j.exit, label %for.i.latch, label %for.j
+
+for.i.latch:
+ %i.next = add i32 %i, 1
+ %i.exit = icmp eq i32 %i.next, 16
+ br i1 %i.exit, label %exit, label %for.i.header
+
+exit:
+ ret void
+}
>From 23771cad96d74123362883c03129caaa4ba0bf03 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 3 Jul 2025 22:37:21 +0900
Subject: [PATCH 2/5] Clean up commented-out code
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9b3bb2053961e..b8c871814e9ab 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -566,19 +566,6 @@ struct LoopInterchange {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- // Obtain the loop vector returned from loop cache analysis beforehand,
- // and put each <Loop, index> pair into a map for constant time query
- // later. Indices in loop vector reprsent the optimal order of the
- // corresponding loop, e.g., given a loopnest with depth N, index 0
- // indicates the loop should be placed as the outermost loop and index N
- // indicates the loop should be placed as the innermost loop.
- //
- // For the old pass manager CacheCost would be null.
- // DenseMap<const Loop *, unsigned> CostMap;
- // if (CC != nullptr) {
- // for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts()))
- // CostMap[Cost.first] = Idx;
- // }
LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
>From e856189e01dd04e3fa90ab3a9a04e2486925c6a7 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 7 Jul 2025 11:52:39 +0000
Subject: [PATCH 3/5] Change the types
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b8c871814e9ab..dd067fd9aa089 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -430,7 +430,7 @@ class LoopInterchangeCacheCostManager {
LoopStandardAnalysisResults *AR,
DependenceInfo *DI)
: OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
- std::unique_ptr<CacheCost> &getCacheCost();
+ CacheCost *getCacheCost();
const DenseMap<const Loop *, unsigned> &getCostMap();
};
@@ -451,8 +451,7 @@ class LoopInterchangeProfitability {
private:
int getInstrOrderCost();
std::optional<bool> isProfitablePerLoopCacheAnalysis(
- const DenseMap<const Loop *, unsigned> &CostMap,
- std::unique_ptr<CacheCost> &CC);
+ const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC);
std::optional<bool> isProfitablePerInstrOrderCost();
std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
unsigned OuterLoopId,
@@ -1155,9 +1154,9 @@ void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
CostMap[Cost.first] = Idx;
}
-std::unique_ptr<CacheCost> &LoopInterchangeCacheCostManager::getCacheCost() {
+CacheCost *LoopInterchangeCacheCostManager::getCacheCost() {
computeIfUnitinialized();
- return *CC;
+ return CC->get();
}
const DenseMap<const Loop *, unsigned> &
@@ -1221,8 +1220,7 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
std::optional<bool>
LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
- const DenseMap<const Loop *, unsigned> &CostMap,
- std::unique_ptr<CacheCost> &CC) {
+ const DenseMap<const Loop *, unsigned> &CostMap, CacheCost *CC) {
// This is the new cost model returned from loop cache analysis.
// A smaller index means the loop should be placed an outer loop, and vice
// versa.
@@ -1305,7 +1303,7 @@ bool LoopInterchangeProfitability::isProfitable(
for (RuleTy RT : Profitabilities) {
switch (RT) {
case RuleTy::PerLoopCacheAnalysis: {
- std::unique_ptr<CacheCost> &CC = LICCM.getCacheCost();
+ CacheCost *CC = LICCM.getCacheCost();
const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
break;
>From f95a5dd38f6bf6d4d3bf0c7982e3cfe226d03874 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 7 Jul 2025 11:55:59 +0000
Subject: [PATCH 4/5] Change the class name
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 32 ++++++++-----------
1 file changed, 14 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index dd067fd9aa089..a5008907b9014 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -410,7 +410,7 @@ class LoopInterchangeLegality {
/// Manages information utilized by the profitability check for cache. The main
/// purpose of this class is to delay the computation of CacheCost until it is
/// actually needed.
-class LoopInterchangeCacheCostManager {
+class CacheCostManager {
Loop *OutermostLoop;
LoopStandardAnalysisResults *AR;
DependenceInfo *DI;
@@ -426,9 +426,8 @@ class LoopInterchangeCacheCostManager {
void computeIfUnitinialized();
public:
- LoopInterchangeCacheCostManager(Loop *OutermostLoop,
- LoopStandardAnalysisResults *AR,
- DependenceInfo *DI)
+ CacheCostManager(Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
+ DependenceInfo *DI)
: OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
CacheCost *getCacheCost();
const DenseMap<const Loop *, unsigned> &getCostMap();
@@ -445,8 +444,7 @@ class LoopInterchangeProfitability {
/// Check if the loop interchange is profitable.
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
unsigned InnerLoopId, unsigned OuterLoopId,
- CharMatrix &DepMatrix,
- LoopInterchangeCacheCostManager &LICCM);
+ CharMatrix &DepMatrix, CacheCostManager &CCM);
private:
int getInstrOrderCost();
@@ -565,7 +563,7 @@ struct LoopInterchange {
}
unsigned SelecLoopId = selectLoopForInterchange(LoopList);
- LoopInterchangeCacheCostManager LICCM(LoopList[0], AR, DI);
+ CacheCostManager CCM(LoopList[0], AR, DI);
// We try to achieve the globally optimal memory access for the loopnest,
// and do interchange based on a bubble-sort fasion. We start from
// the innermost loop, move it outwards to the best possible position
@@ -574,7 +572,7 @@ struct LoopInterchange {
bool ChangedPerIter = false;
for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
bool Interchanged =
- processLoop(LoopList, i, i - 1, DependencyMatrix, LICCM);
+ processLoop(LoopList, i, i - 1, DependencyMatrix, CCM);
ChangedPerIter |= Interchanged;
Changed |= Interchanged;
}
@@ -589,7 +587,7 @@ struct LoopInterchange {
bool processLoop(SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
unsigned OuterLoopId,
std::vector<std::vector<char>> &DependencyMatrix,
- LoopInterchangeCacheCostManager &LICCM) {
+ CacheCostManager &CCM) {
Loop *OuterLoop = LoopList[OuterLoopId];
Loop *InnerLoop = LoopList[InnerLoopId];
LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
@@ -602,7 +600,7 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
- DependencyMatrix, LICCM)) {
+ DependencyMatrix, CCM)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
@@ -1135,7 +1133,7 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return true;
}
-void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
+void CacheCostManager::computeIfUnitinialized() {
if (CC.has_value())
return;
@@ -1154,13 +1152,12 @@ void LoopInterchangeCacheCostManager::computeIfUnitinialized() {
CostMap[Cost.first] = Idx;
}
-CacheCost *LoopInterchangeCacheCostManager::getCacheCost() {
+CacheCost *CacheCostManager::getCacheCost() {
computeIfUnitinialized();
return CC->get();
}
-const DenseMap<const Loop *, unsigned> &
-LoopInterchangeCacheCostManager::getCostMap() {
+const DenseMap<const Loop *, unsigned> &CacheCostManager::getCostMap() {
computeIfUnitinialized();
return CostMap;
}
@@ -1288,8 +1285,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
bool LoopInterchangeProfitability::isProfitable(
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
- unsigned OuterLoopId, CharMatrix &DepMatrix,
- LoopInterchangeCacheCostManager &LICCM) {
+ unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
// isProfitable() is structured to avoid endless loop interchange. If the
// highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
// decide the profitability then, profitability check will stop and return the
@@ -1303,8 +1299,8 @@ bool LoopInterchangeProfitability::isProfitable(
for (RuleTy RT : Profitabilities) {
switch (RT) {
case RuleTy::PerLoopCacheAnalysis: {
- CacheCost *CC = LICCM.getCacheCost();
- const DenseMap<const Loop *, unsigned> &CostMap = LICCM.getCostMap();
+ CacheCost *CC = CCM.getCacheCost();
+ const DenseMap<const Loop *, unsigned> &CostMap = CCM.getCostMap();
shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);
break;
}
>From a3549b00e6742744b95f9325000c9b89a8356e2f Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 8 Jul 2025 07:19:03 +0000
Subject: [PATCH 5/5] Fix test
---
.../LoopInterchange/delay-cachecost-calculation.ll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
index 69ee30eadade7..d80b4420c034c 100644
--- a/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
+++ b/llvm/test/Transforms/LoopInterchange/delay-cachecost-calculation.ll
@@ -23,7 +23,7 @@ for.i.header:
for.j:
%j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
- %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+ %idx = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
%val = load i32, ptr %idx
%inc = add i32 %val, 1
store i32 %inc, ptr %idx
@@ -59,8 +59,8 @@ for.i.header:
for.j:
%j = phi i32 [ 0, %for.i.header ], [ %j.next, %for.j ]
- %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %i, i32 %j
- %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 %j, i32 %i
+ %idx.load = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %i, i32 %j
+ %idx.store = getelementptr inbounds [16 x [16 x i32]], ptr @A, i32 0, i32 %j, i32 %i
%val = load i32, ptr %idx.load
store i32 %val, ptr %idx.store
%j.next = add i32 %j, 1
More information about the llvm-commits
mailing list