[llvm] LCA: migrate from DA to LAA; fix cost computation (PR #107691)

via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 7 04:43:32 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

LoopCacheAnalysis currently uses DependenceAnalysis, which hasn't been worked on for years. Migrate it to use the newer and actively-maintaiend LoopAccessAnalysis, fixing a long-standing cost-computation issue, due to an underlying bug in DependenceAnalysis.

---
Full diff: https://github.com/llvm/llvm-project/pull/107691.diff


6 Files Affected:

- (modified) llvm/include/llvm/Analysis/LoopAccessAnalysis.h (+2) 
- (modified) llvm/include/llvm/Analysis/LoopCacheAnalysis.h (+8-4) 
- (modified) llvm/lib/Analysis/LoopCacheAnalysis.cpp (+36-49) 
- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+3-2) 
- (modified) llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll (+1-5) 
- (modified) llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll (+2-6) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 73d9c26ed6b1b7..41f027811c8802 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -270,6 +270,8 @@ class MemoryDepChecker {
     return PointerBounds;
   }
 
+  uint64_t getMinDepDist() const { return MinDepDistBytes; }
+
 private:
   /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
   /// applies dynamic knowledge to simplify SCEV expressions and convert them
diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
index 4fd2485e39d6db..edd13a413ee9ea 100644
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
 #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
 
+#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/IR/PassManager.h"
 #include <optional>
@@ -82,7 +83,8 @@ class IndexedReference {
   /// MaxDistance and std::nullopt if unsure.
   std::optional<bool> hasTemporalReuse(const IndexedReference &Other,
                                        unsigned MaxDistance, const Loop &L,
-                                       DependenceInfo &DI, AAResults &AA) const;
+                                       const LoopAccessInfo &LAI,
+                                       AAResults &AA) const;
 
   /// Compute the cost of the reference w.r.t. the given loop \p L when it is
   /// considered in the innermost position in the loop nest.
@@ -199,7 +201,8 @@ class CacheCost {
   /// between array elements accessed in a loop so that the elements are
   /// classified to have temporal reuse.
   CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
-            TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI,
+            TargetTransformInfo &TTI, AAResults &AA,
+            LoopAccessInfoManager &LAIs,
             std::optional<unsigned> TRT = std::nullopt);
 
   /// Create a CacheCost for the loop nest rooted by \p Root.
@@ -207,7 +210,8 @@ class CacheCost {
   /// between array elements accessed in a loop so that the elements are
   /// classified to have temporal reuse.
   static std::unique_ptr<CacheCost>
-  getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
+  getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
+               LoopAccessInfoManager &LAIs,
                std::optional<unsigned> TRT = std::nullopt);
 
   /// Return the estimated cost of loop \p L if the given loop is part of the
@@ -276,7 +280,7 @@ class CacheCost {
   ScalarEvolution &SE;
   TargetTransformInfo &TTI;
   AAResults &AA;
-  DependenceInfo &DI;
+  LoopAccessInfoManager &LAIs;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 7ca9f15ad5fca0..ff83626eee54f7 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -31,7 +31,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Delinearization.h"
-#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -211,10 +211,9 @@ IndexedReference::hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
   return InSameCacheLine;
 }
 
-std::optional<bool>
-IndexedReference::hasTemporalReuse(const IndexedReference &Other,
-                                   unsigned MaxDistance, const Loop &L,
-                                   DependenceInfo &DI, AAResults &AA) const {
+std::optional<bool> IndexedReference::hasTemporalReuse(
+    const IndexedReference &Other, unsigned MaxDistance, const Loop &L,
+    const LoopAccessInfo &LAI, AAResults &AA) const {
   assert(IsValid && "Expecting a valid reference");
 
   if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
@@ -223,46 +222,33 @@ IndexedReference::hasTemporalReuse(const IndexedReference &Other,
     return false;
   }
 
-  std::unique_ptr<Dependence> D =
-      DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
-
-  if (D == nullptr) {
+  auto DepChecker = LAI.getDepChecker();
+  auto *Deps = DepChecker.getDependences();
+  if (!Deps) {
     LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
     return false;
   }
 
-  if (D->isLoopIndependent()) {
-    LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
-    return true;
-  }
-
-  // Check the dependence distance at every loop level. There is temporal reuse
-  // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
-  // it is zero at every other loop level.
-  int LoopDepth = L.getLoopDepth();
-  int Levels = D->getLevels();
-  for (int Level = 1; Level <= Levels; ++Level) {
-    const SCEV *Distance = D->getDistance(Level);
-    const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
-
-    if (SCEVConst == nullptr) {
-      LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
-      return std::nullopt;
-    }
+  std::optional<MemoryDepChecker::Dependence> D;
+  for (auto &C : *Deps)
+    if (C.getSource(DepChecker) == &StoreOrLoadInst &&
+        C.getDestination(DepChecker) == &Other.StoreOrLoadInst)
+      D = C;
 
-    const ConstantInt &CI = *SCEVConst->getValue();
-    if (Level != LoopDepth && !CI.isZero()) {
-      LLVM_DEBUG(dbgs().indent(2)
-                 << "No temporal reuse: distance is not zero at depth=" << Level
-                 << "\n");
-      return false;
-    } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
-      LLVM_DEBUG(
-          dbgs().indent(2)
-          << "No temporal reuse: distance is greater than MaxDistance at depth="
-          << Level << "\n");
-      return false;
-    }
+  if (!D || D->Type == MemoryDepChecker::Dependence::NoDep) {
+    LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
+    return false;
+  }
+  if (D->Type == MemoryDepChecker::Dependence::Unknown ||
+      D->Type == MemoryDepChecker::Dependence::IndirectUnsafe) {
+    LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
+    return std::nullopt;
+  }
+  if (DepChecker.getMinDepDist() > MaxDistance) {
+    LLVM_DEBUG(dbgs().indent(2)
+               << "No temporal reuse: distance is greater than MaxDistance"
+               << "\n");
+    return false;
   }
 
   LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
@@ -561,10 +547,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
 
 CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
                      ScalarEvolution &SE, TargetTransformInfo &TTI,
-                     AAResults &AA, DependenceInfo &DI,
+                     AAResults &AA, LoopAccessInfoManager &LAIs,
                      std::optional<unsigned> TRT)
     : Loops(Loops), TRT(TRT.value_or(TemporalReuseThreshold)), LI(LI), SE(SE),
-      TTI(TTI), AA(AA), DI(DI) {
+      TTI(TTI), AA(AA), LAIs(LAIs) {
   assert(!Loops.empty() && "Expecting a non-empty loop vector.");
 
   for (const Loop *L : Loops) {
@@ -578,7 +564,8 @@ CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
 
 std::unique_ptr<CacheCost>
 CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
-                        DependenceInfo &DI, std::optional<unsigned> TRT) {
+                        LoopAccessInfoManager &LAIs,
+                        std::optional<unsigned> TRT) {
   if (!Root.isOutermost()) {
     LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
     return nullptr;
@@ -593,7 +580,8 @@ CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
     return nullptr;
   }
 
-  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
+  return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, LAIs,
+                                     TRT);
 }
 
 void CacheCost::calculateCacheFootprint() {
@@ -654,7 +642,8 @@ bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
        // should have a cost closer to 2x the second due to the two cache
        // access per iteration from opposite ends of the array
         std::optional<bool> HasTemporalReuse =
-            R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
+            R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop,
+                                LAIs.getInfo(*InnerMostLoop), AA);
         std::optional<bool> HasSpacialReuse =
             R->hasSpacialReuse(Representative, CLS, AA);
 
@@ -735,10 +724,8 @@ CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
 PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
                                             LoopStandardAnalysisResults &AR,
                                             LPMUpdater &U) {
-  Function *F = L.getHeader()->getParent();
-  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
-
-  if (auto CC = CacheCost::getCacheCost(L, AR, DI))
+  LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
+  if (auto CC = CacheCost::getCacheCost(L, AR, LAIs))
     OS << *CC;
 
   return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 949296c3db0de2..fcd414efaf954f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1714,10 +1714,11 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
                                            LPMUpdater &U) {
   Function &F = *LN.getParent();
 
-  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
+  LoopAccessInfoManager LAIs(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr);
   std::unique_ptr<CacheCost> CC =
-      CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
+      CacheCost::getCacheCost(LN.getOutermostLoop(), AR, LAIs);
   OptimizationRemarkEmitter ORE(&F);
+  DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
   if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
     return PreservedAnalyses::all();
   U.markLoopNestChanged(true);
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
index 7275d04c92b472..a828f925ba93a4 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
@@ -114,11 +114,7 @@ for.neg.end:                                          ; preds = %for.neg.cond
 ;   for (int i = 40960; i > 0; i--)
 ;     B[i] = B[40960 - i];
 
-; FIXME: Currently negative access functions are treated the same as positive
-; access functions. When this is fixed this testcase should have a cost
-; approximately 2x higher.
-
-; CHECK: Loop 'for.cond2' has cost = 2561
+; CHECK: Loop 'for.cond2' has cost = 5122
 define void @Test2(ptr %B) {
 entry:
   br label %for.cond2
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
index 205cd851fce0de..2a989e962b16ba 100644
--- a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
@@ -121,12 +121,8 @@ for.neg.end:                                          ; preds = %for.neg.cond
 ;   for (int i = 40960; i > 0; i--)
 ;     B[i] = B[40960 - i];
 
-; FIXME: Currently negative access functions are treated the same as positive
-; access functions. When this is fixed this testcase should have a cost
-; approximately 2x higher.
-
-; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 10241
-; LARGER-CACHELINE: Loop 'for.cond2' has cost = 1281
+; SMALLER-CACHELINE: Loop 'for.cond2' has cost = 20482
+; LARGER-CACHELINE: Loop 'for.cond2' has cost = 2562
 define void @Test2(ptr %B) {
 entry:
   br label %for.cond2

``````````

</details>


https://github.com/llvm/llvm-project/pull/107691


More information about the llvm-commits mailing list