[llvm] LCA: migrate from DA to LAA; fix cost computation (PR #107691)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 7 04:43:32 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
LoopCacheAnalysis currently uses DependenceAnalysis, which hasn't been worked on for years. Migrate it to use the newer and actively-maintaiend LoopAccessAnalysis, fixing a long-standing cost-computation issue, due to an underlying bug in DependenceAnalysis.
---
Full diff: https://github.com/llvm/llvm-project/pull/107691.diff
6 Files Affected:
- (modified) llvm/include/llvm/Analysis/LoopAccessAnalysis.h (+2)
- (modified) llvm/include/llvm/Analysis/LoopCacheAnalysis.h (+8-4)
- (modified) llvm/lib/Analysis/LoopCacheAnalysis.cpp (+36-49)
- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+3-2)
- (modified) llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll (+1-5)
- (modified) llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll (+2-6)
``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 73d9c26ed6b1b7..41f027811c8802 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -270,6 +270,8 @@ class MemoryDepChecker {
return PointerBounds;
}
+ uint64_t getMinDepDist() const { return MinDepDistBytes; }
+
private:
/// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
/// applies dynamic knowledge to simplify SCEV expressions and convert them
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index 4fd2485e39d6db..edd13a413ee9ea 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -14,6 +14,7 @@
#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
#include <optional>
@@ -82,7 +83,8 @@ class IndexedReference {
/// MaxDistance and std::nullopt if unsure.
std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
unsigned MaxDistance, const Loop &L,
- DependenceInfo &DI, AAResults &AA) const;
+ const LoopAccessInfo &LAI,
+ AAResults &AA) const;
/// Compute the cost of the reference w.r.t. the given loop \p L when it is
/// considered in the innermost position in the loop nest.
@@ -199,7 +201,8 @@ class CacheCost {
/// between array elements accessed in a loop so that the elements are
/// classified to have temporal reuse.
CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
- TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI,
+ TargetTransformInfo &TTI, AAResults &AA,
+ LoopAccessInfoManager &LAIs,
std::optional<unsigned> TRT = std::nullopt);
/// Create a CacheCost for the loop nest rooted by \p Root.
@@ -207,7 +210,8 @@ class CacheCost {
/// between array elements accessed in a loop so that the elements are
/// classified to have temporal reuse.
static std::unique_ptr<CacheCost>
- getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
+ getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
+ LoopAccessInfoManager &LAIs,
std::optional<unsigned> TRT = std::nullopt);
/// Return the estimated cost of loop \p L if the given loop is part of the
@@ -276,7 +280,7 @@ class CacheCost {
ScalarEvolution &SE;
TargetTransformInfo &TTI;
AAResults &AA;
- DependenceInfo &DI;
+ LoopAccessInfoManager &LAIs;
};
raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 7ca9f15ad5fca0..ff83626eee54f7 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -31,7 +31,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Delinearization.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -211,10 +211,9 @@ IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
return InSameCacheLine;
}
-std::optional<bool>
-IndexedReference::hasTemporalReuse(const IndexedReference &Other,
- unsigned MaxDistance, const Loop &L,
- DependenceInfo &DI, AAResults &AA) const {
+std::optional<bool> IndexedReference::hasTemporalReuse(
+ const IndexedReference &Other, unsigned MaxDistance, const Loop &L,
+ const LoopAccessInfo &LAI, AAResults &AA) const {
assert(IsValid && "Expecting a valid reference");
if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
@@ -223,46 +222,33 @@ IndexedReference::hasTemporalReuse(const IndexedReference &Other,
return false;
}
- std::unique_ptr<Dependence> D =
- DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
-
- if (D == nullptr) {
+ auto DepChecker = LAI.getDepChecker();
+ auto *Deps = DepChecker.getDependences();
+ if (!Deps) {
LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
return false;
}
- if (D->isLoopIndependent()) {
- LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
- return true;
- }
-
- // Check the dependence distance at every loop level. There is temporal reuse
- // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
- // it is zero at every other loop level.
- int LoopDepth = L.getLoopDepth();
- int Levels = D->getLevels();
- for (int Level = 1; Level <= Levels; ++Level) {
- const SCEV *Distance = D->getDistance(Level);
- const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
-
- if (SCEVConst == nullptr) {
- LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
- return std::nullopt;
- }
+ std::optional<MemoryDepChecker::Dependence> D;
+ for (auto &C : *Deps)
+ if (C.getSource(DepChecker) == &StoreOrLoadInst &&
+ C.getDestination(DepChecker) == &Other.StoreOrLoadInst)
+ D = C;
- const ConstantInt &CI = *SCEVConst->getValue();
- if (Level != LoopDepth && !CI.isZero()) {
- LLVM_DEBUG(dbgs().indent(2)
- << "No temporal reuse: distance is not zero at depth=" << Level
- << "\n");
- return false;
- } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
- LLVM_DEBUG(
- dbgs().indent(2)
- << "No temporal reuse: distance is greater than MaxDistance at depth="
- << Level << "\n");
- return false;
- }
+ if (!D || D->Type == MemoryDepChecker::Dependence::NoDep) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
+ return false;
+ }
+ if (D->Type == MemoryDepChecker::Dependence::Unknown ||
+ D->Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
+ LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
+ return std::nullopt;
+ }
+ if (DepChecker.getMinDepDist() > MaxDistance) {
+ LLVM_DEBUG(dbgs().indent(2)
+ << "No temporal reuse: distance is greater than MaxDistance"
+ << "\n");
+ return false;
}
LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
@@ -561,10 +547,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
ScalarEvolution &SE, TargetTransformInfo &TTI,
- AAResults &AA, DependenceInfo &DI,
+ AAResults &AA, LoopAccessInfoManager &LAIs,
std::optional<unsigned> TRT)
: Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE),
- TTI(TTI), AA(AA), DI(DI) {
+ TTI(TTI), AA(AA), LAIs(LAIs) {
assert(!Loops.empty() && "Expecting a non-empty loop vector.");
for (const Loop *L : Loops) {
@@ -578,7 +564,8 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
std::unique_ptr<CacheCost>
CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
- DependenceInfo &DI, std::optional<unsigned> TRT) {
+ LoopAccessInfoManager &LAIs,
+ std::optional<unsigned> TRT) {
if (!Root.isOutermost()) {
LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
return nullptr;
@@ -593,7 +580,8 @@ CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
return nullptr;
}
- return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
+ return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, LAIs,
+ TRT);
}
void CacheCost::calculateCacheFootprint() {
@@ -654,7 +642,8 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
// should have a cost closer to 2x the second due to the two cache
// access per iteration from opposite ends of the array
std::optional<bool> HasTemporalReuse =
- R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
+ R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop,
+ LAIs.getInfo(*InnerMostLoop), AA);
std::optional<bool> HasSpacialReuse =
R->hasSpacialReuse(Representative, CLS, AA);
@@ -735,10 +724,8 @@ CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function *F = L.getHeader()->getParent();
- DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
-
- if (auto CC = CacheCost::getCacheCost(L, AR, DI))
+ LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
+ if (auto CC = CacheCost::getCacheCost(L, AR, LAIs))
OS << *CC;
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 949296c3db0de2..fcd414efaf954f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1714,10 +1714,11 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LPMUpdater &U) {
Function &F = *LN.getParent();
- DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+ LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
std::unique_ptr<CacheCost> CC =
- CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
+ CacheCost::getCacheCost(LN.getOutermostLoop(), AR, LAIs);
OptimizationRemarkEmitter ORE(&F);
+ DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
index 7275d04c92b472..a828f925ba93a4 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
@@ -114,11 +114,7 @@ for.neg.end: ; preds = %for.neg.cond
; for (int i = 40960; i > 0; i--)
; B[i] = B[40960 - i];
-; FIXME: Currently negative access functions are treated the same as positive
-; access functions. When this is fixed this testcase should have a cost
-; approximately 2x higher.
-
-; CHECK: Loop 'for.cond2' has cost = 2561
+; CHECK: Loop 'for.cond2' has cost = 5122
define void @Test2(ptr %B) {
entry:
br label %for.cond2
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
index 205cd851fce0de..2a989e962b16ba 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
@@ -121,12 +121,8 @@ for.neg.end: ; preds = %for.neg.cond
; for (int i = 40960; i > 0; i--)
; B[i] = B[40960 - i];
-; FIXME: Currently negative access functions are treated the same as positive
-; access functions. When this is fixed this testcase should have a cost
-; approximately 2x higher.
-
-; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241
-; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281
+; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 20482
+; LARGER-CACHELINE: Loop 'for.cond2' has cost = 2562
define void @Test2(ptr %B) {
entry:
br label %for.cond2
``````````
</details>
https://github.com/llvm/llvm-project/pull/107691
More information about the llvm-commits
mailing list