[llvm] [MemDep] Optimize SortNonLocalDepInfoCache sorting strategy for large caches with few unsorted entries (PR #143107)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 10 20:02:48 PDT 2025


https://github.com/DingdWang updated https://github.com/llvm/llvm-project/pull/143107

>From d6e4042439cd525a4eb177695b1aba43cd4e15de Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Fri, 6 Jun 2025 16:41:50 +0800
Subject: [PATCH 1/3] [MemDep] speed up SortNonLocalDepInfoCache with a big
 cache size

---
 .../lib/Analysis/MemoryDependenceAnalysis.cpp | 33 +++++++------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index f062189bac6a0..8dffb3e422ed3 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -51,6 +51,7 @@
 #include <cassert>
 #include <iterator>
 #include <utility>
+#include <cmath>
 
 using namespace llvm;
 
@@ -83,6 +84,9 @@ static cl::opt<unsigned>
 // Limit on the number of memdep results to process.
 static const unsigned int NumResultsLimit = 100;
 
+// for quickly calculating log
+const float ln2 = 0.69314718f;
+
 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
 ///
 /// If the set becomes empty, remove Inst's entry.
@@ -991,33 +995,20 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
 static void
 SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
                          unsigned NumSortedEntries) {
-  switch (Cache.size() - NumSortedEntries) {
-  case 0:
-    // done, no new entries.
-    break;
-  case 2: {
-    // Two new entries, insert the last one into place.
-    NonLocalDepEntry Val = Cache.back();
-    Cache.pop_back();
-    MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
-        std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
-    Cache.insert(Entry, Val);
-    [[fallthrough]];
-  }
-  case 1:
-    // One new entry, Just insert the new value at the appropriate position.
-    if (Cache.size() != 1) {
+
+  auto s = Cache.size() - NumSortedEntries;
+  if (s < log2(Cache.size()) * ln2) {
+    while (s>0) {
       NonLocalDepEntry Val = Cache.back();
       Cache.pop_back();
       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
-          llvm::upper_bound(Cache, Val);
+        std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
       Cache.insert(Entry, Val);
+      s--;
     }
-    break;
-  default:
-    // Added many values, do a full scale sort.
+  }
+  else {
     llvm::sort(Cache);
-    break;
   }
 }
 

>From a26cf324935689eeb3b6444395d9e7ab5b010805 Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Fri, 6 Jun 2025 17:01:07 +0800
Subject: [PATCH 2/3] format

---
 .../lib/Analysis/MemoryDependenceAnalysis.cpp | 32 ++++++++-----------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 8dffb3e422ed3..bd0d6bb18241a 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -49,9 +49,9 @@
 #include "llvm/Support/Debug.h"
 #include <algorithm>
 #include <cassert>
+#include <cmath>
 #include <iterator>
 #include <utility>
-#include <cmath>
 
 using namespace llvm;
 
@@ -373,8 +373,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
     BatchAAResults &BatchAA) {
   bool isInvariantLoad = false;
-  Align MemLocAlign =
-      MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
+  Align MemLocAlign = MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
 
   unsigned DefaultLimit = getDefaultBlockScanLimit();
   if (!Limit)
@@ -422,7 +421,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
   // True for volatile instruction.
   // For Load/Store return true if atomic ordering is stronger than AO,
   // for other instruction just true if it can read or write to memory.
-  auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
+  auto isComplexForReordering = [](Instruction *I, AtomicOrdering AO) -> bool {
     if (I->isVolatile())
       return true;
     if (auto *LI = dyn_cast<LoadInst>(I))
@@ -465,7 +464,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
       case Intrinsic::masked_load:
       case Intrinsic::masked_store: {
         MemoryLocation Loc;
-        /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
+        /*ModRefInfo MR =*/GetLocation(II, Loc, TLI);
         AliasResult R = BatchAA.alias(Loc, MemLoc);
         if (R == AliasResult::NoAlias)
           continue;
@@ -894,7 +893,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
   // translation.
   SmallDenseMap<BasicBlock *, Value *, 16> Visited;
   if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
-                                   Result, Visited, true))
+                                  Result, Visited, true))
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
@@ -998,16 +997,15 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
 
   auto s = Cache.size() - NumSortedEntries;
   if (s < log2(Cache.size()) * ln2) {
-    while (s>0) {
+    while (s > 0) {
       NonLocalDepEntry Val = Cache.back();
       Cache.pop_back();
       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
-        std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
+          std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
       Cache.insert(Entry, Val);
       s--;
     }
-  }
-  else {
+  } else {
     llvm::sort(Cache);
   }
 }
@@ -1334,8 +1332,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
       // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
           !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
-                                      Loc.getWithNewPtr(PredPtrVal), isLoad,
-                                      Pred, Result, Visited)) {
+                                       Loc.getWithNewPtr(PredPtrVal), isLoad,
+                                       Pred, Result, Visited)) {
         // Add the entry to the Result list.
         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
         Result.push_back(Entry);
@@ -1403,7 +1401,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
 
         I.setResult(MemDepResult::getUnknown());
 
-
         break;
       }
     }
@@ -1724,9 +1721,7 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {}
 
 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
 
-void MemoryDependenceWrapperPass::releaseMemory() {
-  MemDep.reset();
-}
+void MemoryDependenceWrapperPass::releaseMemory() { MemDep.reset(); }
 
 void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -1736,8 +1731,9 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
-bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
-                               FunctionAnalysisManager::Invalidator &Inv) {
+bool MemoryDependenceResults::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
   // Check whether our analysis is preserved.
   auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())

>From 1ede44c862519f0ff09ce117f3de0c3f875b8411 Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Wed, 11 Jun 2025 11:01:49 +0800
Subject: [PATCH 3/3] Revert "format"

This reverts commit a26cf324935689eeb3b6444395d9e7ab5b010805.
---
 .../lib/Analysis/MemoryDependenceAnalysis.cpp | 32 +++++++++++--------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index bd0d6bb18241a..8dffb3e422ed3 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -49,9 +49,9 @@
 #include "llvm/Support/Debug.h"
 #include <algorithm>
 #include <cassert>
-#include <cmath>
 #include <iterator>
 #include <utility>
+#include <cmath>
 
 using namespace llvm;
 
@@ -373,7 +373,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
     BatchAAResults &BatchAA) {
   bool isInvariantLoad = false;
-  Align MemLocAlign = MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
+  Align MemLocAlign =
+      MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
 
   unsigned DefaultLimit = getDefaultBlockScanLimit();
   if (!Limit)
@@ -421,7 +422,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
   // True for volatile instruction.
   // For Load/Store return true if atomic ordering is stronger than AO,
   // for other instruction just true if it can read or write to memory.
-  auto isComplexForReordering = [](Instruction *I, AtomicOrdering AO) -> bool {
+  auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
     if (I->isVolatile())
       return true;
     if (auto *LI = dyn_cast<LoadInst>(I))
@@ -464,7 +465,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
       case Intrinsic::masked_load:
       case Intrinsic::masked_store: {
         MemoryLocation Loc;
-        /*ModRefInfo MR =*/GetLocation(II, Loc, TLI);
+        /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
         AliasResult R = BatchAA.alias(Loc, MemLoc);
         if (R == AliasResult::NoAlias)
           continue;
@@ -893,7 +894,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
   // translation.
   SmallDenseMap<BasicBlock *, Value *, 16> Visited;
   if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
-                                  Result, Visited, true))
+                                   Result, Visited, true))
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
@@ -997,15 +998,16 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
 
   auto s = Cache.size() - NumSortedEntries;
   if (s < log2(Cache.size()) * ln2) {
-    while (s > 0) {
+    while (s>0) {
       NonLocalDepEntry Val = Cache.back();
       Cache.pop_back();
       MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
-          std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
+        std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
       Cache.insert(Entry, Val);
       s--;
     }
-  } else {
+  }
+  else {
     llvm::sort(Cache);
   }
 }
@@ -1332,8 +1334,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
       // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
           !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
-                                       Loc.getWithNewPtr(PredPtrVal), isLoad,
-                                       Pred, Result, Visited)) {
+                                      Loc.getWithNewPtr(PredPtrVal), isLoad,
+                                      Pred, Result, Visited)) {
         // Add the entry to the Result list.
         NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
         Result.push_back(Entry);
@@ -1401,6 +1403,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
 
         I.setResult(MemDepResult::getUnknown());
 
+
         break;
       }
     }
@@ -1721,7 +1724,9 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {}
 
 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
 
-void MemoryDependenceWrapperPass::releaseMemory() { MemDep.reset(); }
+void MemoryDependenceWrapperPass::releaseMemory() {
+  MemDep.reset();
+}
 
 void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
@@ -1731,9 +1736,8 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
-bool MemoryDependenceResults::invalidate(
-    Function &F, const PreservedAnalyses &PA,
-    FunctionAnalysisManager::Invalidator &Inv) {
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
   // Check whether our analysis is preserved.
   auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())



More information about the llvm-commits mailing list