[llvm] 8e7461e - [LoadStoreVectorizer] Batch alias analysis results to improve compile time (#147555)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 09:23:36 PDT 2025
Author: Drew Kersnar
Date: 2025-07-10T11:23:33-05:00
New Revision: 8e7461e29a7c9f1721758b30eb99b0ccab45a7cd
URL: https://github.com/llvm/llvm-project/commit/8e7461e29a7c9f1721758b30eb99b0ccab45a7cd
DIFF: https://github.com/llvm/llvm-project/commit/8e7461e29a7c9f1721758b30eb99b0ccab45a7cd.diff
LOG: [LoadStoreVectorizer] Batch alias analysis results to improve compile time (#147555)
This should be generally good for a lot of LSV cases, but the attached
test demonstrates a specific compile time issue that appears in the
event where the `CaptureTracking` default max uses is raised.
Without using batching alias analysis, this test takes 6 seconds to
compile in a release build. With, less than a second. This is because
the mechanism that proves `NoAlias` in this case is very expensive
(`CaptureTracking.cpp`), and caching the result leads to 2 calls to that
mechanism instead of ~300,000 (run with -stats to see the difference)
This test only demonstrates the compile time issue if
`capture-tracking-max-uses-to-explore` is set to at least 1024, because
with the default value of 100, the `CaptureTracking` analysis is not
run, `NoAlias` is not proven, and the vectorizer gives up early.
Added:
llvm/test/Transforms/LoadStoreVectorizer/batch-aa-compile-time.ll
Modified:
llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 89f63c3b66aad..7b5137b0185ab 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -322,7 +322,8 @@ class Vectorizer {
template <bool IsLoadChain>
bool isSafeToMove(
Instruction *ChainElem, Instruction *ChainBegin,
- const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);
+ const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
+ BatchAAResults &BatchAA);
/// Merges the equivalence classes if they have underlying objects that
diff er
/// by one level of indirection (i.e., one is a getelementptr and the other is
@@ -543,6 +544,10 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
for (const auto &E : C)
ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
+ // Across a single invocation of this function the IR is not changing, so
+ // using a batched Alias Analysis is safe and can reduce compile time.
+ BatchAAResults BatchAA(AA);
+
// Loads get hoisted up to the first load in the chain. Stores get sunk
// down to the last store in the chain. Our algorithm for loads is:
//
@@ -569,7 +574,7 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
NewChain.emplace_back(*ChainBegin);
for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst,
- ChainOffsets)) {
+ ChainOffsets, BatchAA)) {
LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
<< *ChainIt->Inst << " into " << *ChainBegin->Inst
<< "\n");
@@ -999,7 +1004,8 @@ bool Vectorizer::vectorizeChain(Chain &C) {
template <bool IsLoadChain>
bool Vectorizer::isSafeToMove(
Instruction *ChainElem, Instruction *ChainBegin,
- const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets) {
+ const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
+ BatchAAResults &BatchAA) {
LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
<< *ChainBegin << ")\n");
@@ -1066,7 +1072,8 @@ bool Vectorizer::isSafeToMove(
LLVM_DEBUG({
// Double check that AA also sees this alias. If not, we probably
// have a bug.
- ModRefInfo MR = AA.getModRefInfo(I, MemoryLocation::get(ChainElem));
+ ModRefInfo MR =
+ BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
assert(IsLoadChain ? isModSet(MR) : isModOrRefSet(MR));
dbgs() << "LSV: Found alias in chain: " << *I << "\n";
});
@@ -1077,7 +1084,7 @@ bool Vectorizer::isSafeToMove(
}
LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
- ModRefInfo MR = AA.getModRefInfo(I, MemoryLocation::get(ChainElem));
+ ModRefInfo MR = BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
LLVM_DEBUG(dbgs() << "LSV: Found alias in chain:\n"
<< " Aliasing instruction:\n"
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/batch-aa-compile-time.ll b/llvm/test/Transforms/LoadStoreVectorizer/batch-aa-compile-time.ll
new file mode 100644
index 0000000000000..049c9a3f70e0f
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/batch-aa-compile-time.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S < %s -passes='loop-unroll,load-store-vectorizer' -unroll-count=128 --capture-tracking-max-uses-to-explore=1024 | FileCheck %s
+
+; Without using batching alias analysis, this test takes 6 seconds to compile. With, less than a second.
+; This is because the mechanism that proves NoAlias in this case is very expensive (CaptureTracking.cpp),
+; and caching the result leads to 2 calls to that mechanism instead of ~300,000 (run with -stats to see the
diff erence)
+
+; This test only demonstrates the compile time issue if capture-tracking-max-uses-to-explore is set to at least 1024,
+; because with the default value of 100, the CaptureTracking analysis is not run, NoAlias is not proven, and the vectorizer gives up early.
+
+ at global_mem = external global i8, align 4
+
+define void @compile-time-test() {
+; CHECK-LABEL: define void @compile-time-test() {
+; CHECK-COUNT-128: load <4 x i8>
+entry:
+ ; Create base pointer to a global variable with the inefficient pattern that Alias Analysis cannot easily traverse through.
+ %global_base_loads = getelementptr i8, ptr inttoptr (i32 ptrtoint (ptr @global_mem to i32) to ptr), i64 0
+
+ ; Create another pointer for the stores.
+ %local_base_stores = alloca <512 x i8>, align 4
+
+ ; 512 interwoven loads and stores in a loop that gets unrolled
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i_next, %loop ]
+
+ %ptr_0 = getelementptr i8, ptr %global_base_loads, i64 %i
+ %load_0 = load i8, ptr %ptr_0, align 4
+ %ptr2_0 = getelementptr i8, ptr %local_base_stores, i64 %i
+ store i8 %load_0, ptr %ptr2_0, align 4
+
+ %i_1 = add i64 %i, 1
+
+ %ptr_1 = getelementptr i8, ptr %global_base_loads, i64 %i_1
+ %load_1 = load i8, ptr %ptr_1, align 1
+ %ptr2_1 = getelementptr i8, ptr %local_base_stores, i64 %i_1
+ store i8 %load_1, ptr %ptr2_1, align 1
+
+ %i_2 = add i64 %i, 2
+
+ %ptr_2 = getelementptr i8, ptr %global_base_loads, i64 %i_2
+ %load_2 = load i8, ptr %ptr_2, align 2
+ %ptr2_2 = getelementptr i8, ptr %local_base_stores, i64 %i_2
+ store i8 %load_2, ptr %ptr2_2, align 2
+
+ %i_3 = add i64 %i, 3
+
+ %ptr_3 = getelementptr i8, ptr %global_base_loads, i64 %i_3
+ %load_3 = load i8, ptr %ptr_3, align 1
+ %ptr2_3 = getelementptr i8, ptr %local_base_stores, i64 %i_3
+ store i8 %load_3, ptr %ptr2_3, align 1
+
+ %i_next = add i64 %i, 4
+ %cmp = icmp ult i64 %i_next, 512
+ br i1 %cmp, label %loop, label %done
+
+done:
+ ret void
+}
More information about the llvm-commits
mailing list