[llvm] [MemDep] Optimize SortNonLocalDepInfoCache sorting strategy for large caches with few unsorted entries (PR #143107)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 20:02:48 PDT 2025
https://github.com/DingdWang updated https://github.com/llvm/llvm-project/pull/143107
>From d6e4042439cd525a4eb177695b1aba43cd4e15de Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Fri, 6 Jun 2025 16:41:50 +0800
Subject: [PATCH 1/3] [MemDep] speed up SortNonLocalDepInfoCache with a big
cache size
---
.../lib/Analysis/MemoryDependenceAnalysis.cpp | 33 +++++++------------
1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index f062189bac6a0..8dffb3e422ed3 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -51,6 +51,7 @@
#include <cassert>
#include <iterator>
#include <utility>
+#include <cmath>
using namespace llvm;
@@ -83,6 +84,9 @@ static cl::opt<unsigned>
// Limit on the number of memdep results to process.
static const unsigned int NumResultsLimit = 100;
+// for quickly calculating log
+const float ln2 = 0.69314718f;
+
/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
///
/// If the set becomes empty, remove Inst's entry.
@@ -991,33 +995,20 @@ MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
unsigned NumSortedEntries) {
- switch (Cache.size() - NumSortedEntries) {
- case 0:
- // done, no new entries.
- break;
- case 2: {
- // Two new entries, insert the last one into place.
- NonLocalDepEntry Val = Cache.back();
- Cache.pop_back();
- MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
- Cache.insert(Entry, Val);
- [[fallthrough]];
- }
- case 1:
- // One new entry, Just insert the new value at the appropriate position.
- if (Cache.size() != 1) {
+
+ auto s = Cache.size() - NumSortedEntries;
+ if (s < log2(Cache.size()) * ln2) {
+ while (s>0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- llvm::upper_bound(Cache, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
Cache.insert(Entry, Val);
+ s--;
}
- break;
- default:
- // Added many values, do a full scale sort.
+ }
+ else {
llvm::sort(Cache);
- break;
}
}
>From a26cf324935689eeb3b6444395d9e7ab5b010805 Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Fri, 6 Jun 2025 17:01:07 +0800
Subject: [PATCH 2/3] format
---
.../lib/Analysis/MemoryDependenceAnalysis.cpp | 32 ++++++++-----------
1 file changed, 14 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 8dffb3e422ed3..bd0d6bb18241a 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -49,9 +49,9 @@
#include "llvm/Support/Debug.h"
#include <algorithm>
#include <cassert>
+#include <cmath>
#include <iterator>
#include <utility>
-#include <cmath>
using namespace llvm;
@@ -373,8 +373,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
BatchAAResults &BatchAA) {
bool isInvariantLoad = false;
- Align MemLocAlign =
- MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
+ Align MemLocAlign = MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
unsigned DefaultLimit = getDefaultBlockScanLimit();
if (!Limit)
@@ -422,7 +421,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// True for volatile instruction.
// For Load/Store return true if atomic ordering is stronger than AO,
// for other instruction just true if it can read or write to memory.
- auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
+ auto isComplexForReordering = [](Instruction *I, AtomicOrdering AO) -> bool {
if (I->isVolatile())
return true;
if (auto *LI = dyn_cast<LoadInst>(I))
@@ -465,7 +464,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
case Intrinsic::masked_load:
case Intrinsic::masked_store: {
MemoryLocation Loc;
- /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
+ /*ModRefInfo MR =*/GetLocation(II, Loc, TLI);
AliasResult R = BatchAA.alias(Loc, MemLoc);
if (R == AliasResult::NoAlias)
continue;
@@ -894,7 +893,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
// translation.
SmallDenseMap<BasicBlock *, Value *, 16> Visited;
if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
- Result, Visited, true))
+ Result, Visited, true))
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
@@ -998,16 +997,15 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
auto s = Cache.size() - NumSortedEntries;
if (s < log2(Cache.size()) * ln2) {
- while (s>0) {
+ while (s > 0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
Cache.insert(Entry, Val);
s--;
}
- }
- else {
+ } else {
llvm::sort(Cache);
}
}
@@ -1334,8 +1332,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// assume it is unknown, but this also does not block PRE of the load.
if (!CanTranslate ||
!getNonLocalPointerDepFromBB(QueryInst, PredPointer,
- Loc.getWithNewPtr(PredPtrVal), isLoad,
- Pred, Result, Visited)) {
+ Loc.getWithNewPtr(PredPtrVal), isLoad,
+ Pred, Result, Visited)) {
// Add the entry to the Result list.
NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
Result.push_back(Entry);
@@ -1403,7 +1401,6 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
I.setResult(MemDepResult::getUnknown());
-
break;
}
}
@@ -1724,9 +1721,7 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {}
MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
-void MemoryDependenceWrapperPass::releaseMemory() {
- MemDep.reset();
-}
+void MemoryDependenceWrapperPass::releaseMemory() { MemDep.reset(); }
void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -1736,8 +1731,9 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
-bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
- FunctionAnalysisManager::Invalidator &Inv) {
+bool MemoryDependenceResults::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
// Check whether our analysis is preserved.
auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
>From 1ede44c862519f0ff09ce117f3de0c3f875b8411 Mon Sep 17 00:00:00 2001
From: Dingding <wangdingding at bytedance.com>
Date: Wed, 11 Jun 2025 11:01:49 +0800
Subject: [PATCH 3/3] Revert "format"
This reverts commit a26cf324935689eeb3b6444395d9e7ab5b010805.
---
.../lib/Analysis/MemoryDependenceAnalysis.cpp | 32 +++++++++++--------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index bd0d6bb18241a..8dffb3e422ed3 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -49,9 +49,9 @@
#include "llvm/Support/Debug.h"
#include <algorithm>
#include <cassert>
-#include <cmath>
#include <iterator>
#include <utility>
+#include <cmath>
using namespace llvm;
@@ -373,7 +373,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
BatchAAResults &BatchAA) {
bool isInvariantLoad = false;
- Align MemLocAlign = MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
+ Align MemLocAlign =
+ MemLoc.Ptr->getPointerAlignment(BB->getDataLayout());
unsigned DefaultLimit = getDefaultBlockScanLimit();
if (!Limit)
@@ -421,7 +422,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// True for volatile instruction.
// For Load/Store return true if atomic ordering is stronger than AO,
// for other instruction just true if it can read or write to memory.
- auto isComplexForReordering = [](Instruction *I, AtomicOrdering AO) -> bool {
+ auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
if (I->isVolatile())
return true;
if (auto *LI = dyn_cast<LoadInst>(I))
@@ -464,7 +465,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
case Intrinsic::masked_load:
case Intrinsic::masked_store: {
MemoryLocation Loc;
- /*ModRefInfo MR =*/GetLocation(II, Loc, TLI);
+ /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
AliasResult R = BatchAA.alias(Loc, MemLoc);
if (R == AliasResult::NoAlias)
continue;
@@ -893,7 +894,7 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
// translation.
SmallDenseMap<BasicBlock *, Value *, 16> Visited;
if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
- Result, Visited, true))
+ Result, Visited, true))
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
@@ -997,15 +998,16 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
auto s = Cache.size() - NumSortedEntries;
if (s < log2(Cache.size()) * ln2) {
- while (s > 0) {
+ while (s>0) {
NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
- std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
+ std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
Cache.insert(Entry, Val);
s--;
}
- } else {
+ }
+ else {
llvm::sort(Cache);
}
}
@@ -1332,8 +1334,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// assume it is unknown, but this also does not block PRE of the load.
if (!CanTranslate ||
!getNonLocalPointerDepFromBB(QueryInst, PredPointer,
- Loc.getWithNewPtr(PredPtrVal), isLoad,
- Pred, Result, Visited)) {
+ Loc.getWithNewPtr(PredPtrVal), isLoad,
+ Pred, Result, Visited)) {
// Add the entry to the Result list.
NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
Result.push_back(Entry);
@@ -1401,6 +1403,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
I.setResult(MemDepResult::getUnknown());
+
break;
}
}
@@ -1721,7 +1724,9 @@ MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {}
MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
-void MemoryDependenceWrapperPass::releaseMemory() { MemDep.reset(); }
+void MemoryDependenceWrapperPass::releaseMemory() {
+ MemDep.reset();
+}
void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -1731,9 +1736,8 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
-bool MemoryDependenceResults::invalidate(
- Function &F, const PreservedAnalyses &PA,
- FunctionAnalysisManager::Invalidator &Inv) {
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
// Check whether our analysis is preserved.
auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
More information about the llvm-commits
mailing list