[llvm] 1fcf78d - [SLP]Cache data for compressed loads before codegen
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 08:43:53 PDT 2025
Author: Alexey Bataev
Date: 2025-04-17T08:43:44-07:00
New Revision: 1fcf78d15318e3fbe384c635a0ae651be89d0686
URL: https://github.com/llvm/llvm-project/commit/1fcf78d15318e3fbe384c635a0ae651be89d0686
DIFF: https://github.com/llvm/llvm-project/commit/1fcf78d15318e3fbe384c635a0ae651be89d0686.diff
LOG: [SLP]Cache data for compressed loads before codegen
Need to cache and use cached data for compressed loads before codegen to
avoid side-effects, caused by the earlier vectorization, which may
affect the analysis.
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/masked-loads-side-effects-after-vec.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fd23fb6c81c2c..0cf89ea4a9dc8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1889,6 +1889,7 @@ class BoUpSLP {
LoadEntriesToVectorize.clear();
IsGraphTransformMode = false;
GatheredLoadsEntriesFirst.reset();
+ CompressEntryToData.clear();
ExternalUses.clear();
ExternalUsesAsOriginalScalar.clear();
for (auto &Iter : BlocksSchedules) {
@@ -4308,6 +4309,11 @@ class BoUpSLP {
/// The index of the first gathered load entry in the VectorizeTree.
std::optional<unsigned> GatheredLoadsEntriesFirst;
+ /// Maps compress entries to their mask data for the final codegen.
+ SmallDenseMap<const TreeEntry *,
+ std::tuple<SmallVector<int>, VectorType *, unsigned, bool>>
+ CompressEntryToData;
+
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L)
@@ -13428,6 +13434,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
*TLI, [](Value *) { return true; }, IsMasked, InterleaveFactor,
CompressMask, LoadVecTy);
assert(IsVectorized && "Expected to be vectorized");
+ CompressEntryToData.try_emplace(E, CompressMask, LoadVecTy,
+ InterleaveFactor, IsMasked);
Align CommonAlignment;
if (IsMasked)
CommonAlignment = computeCommonAlignment<LoadInst>(VL);
@@ -17963,10 +17971,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (E->State == TreeEntry::Vectorize) {
NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
} else if (E->State == TreeEntry::CompressVectorize) {
- bool IsMasked;
- unsigned InterleaveFactor;
- SmallVector<int> CompressMask;
- VectorType *LoadVecTy;
SmallVector<Value *> Scalars(E->Scalars.begin(), E->Scalars.end());
if (!E->ReorderIndices.empty()) {
SmallVector<int> Mask(E->ReorderIndices.begin(),
@@ -17976,11 +17980,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<Value *> PointerOps(Scalars.size());
for (auto [I, V] : enumerate(Scalars))
PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
- [[maybe_unused]] bool IsVectorized = isMaskedLoadCompress(
- Scalars, PointerOps, E->ReorderIndices, *TTI, *DL, *SE, *AC, *DT,
- *TLI, [](Value *) { return true; }, IsMasked, InterleaveFactor,
- CompressMask, LoadVecTy);
- assert(IsVectorized && "Expected to be vectorized");
+ auto [CompressMask, LoadVecTy, InterleaveFactor, IsMasked] =
+ CompressEntryToData.at(E);
Align CommonAlignment;
if (IsMasked)
CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/masked-loads-side-effects-after-vec.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/masked-loads-side-effects-after-vec.ll
new file mode 100644
index 0000000000000..ca3c8bbac6366
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/masked-loads-side-effects-after-vec.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+declare noalias ptr @malloc()
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[TMP1:%.*]] = call dereferenceable_or_null(16) ptr @malloc()
+; CHECK-NEXT: [[TMP2:%.*]] = load volatile ptr, ptr null, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <15 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <15 x i8> [[TMP3]], <15 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP2]], align 1
+; CHECK-NEXT: ret void
+;
+ %1 = call dereferenceable_or_null(16) ptr @malloc()
+ %2 = load volatile ptr, ptr null, align 8
+ %3 = load i8, ptr %1, align 1
+ store i8 %3, ptr %2, align 1
+ %4 = getelementptr i8, ptr %1, i64 2
+ %5 = load i8, ptr %4, align 1
+ %6 = getelementptr i8, ptr %2, i64 1
+ store i8 %5, ptr %6, align 1
+ %7 = getelementptr i8, ptr %1, i64 4
+ %8 = load i8, ptr %7, align 1
+ %9 = getelementptr i8, ptr %2, i64 2
+ store i8 %8, ptr %9, align 1
+ %10 = getelementptr i8, ptr %1, i64 6
+ %11 = load i8, ptr %10, align 1
+ %12 = getelementptr i8, ptr %2, i64 3
+ store i8 %11, ptr %12, align 1
+ %13 = getelementptr i8, ptr %1, i64 8
+ %14 = load i8, ptr %13, align 1
+ %15 = getelementptr i8, ptr %2, i64 4
+ store i8 %14, ptr %15, align 1
+ %16 = getelementptr i8, ptr %1, i64 10
+ %17 = load i8, ptr %16, align 1
+ %18 = getelementptr i8, ptr %2, i64 5
+ store i8 %17, ptr %18, align 1
+ %19 = getelementptr i8, ptr %1, i64 12
+ %20 = load i8, ptr %19, align 1
+ %21 = getelementptr i8, ptr %2, i64 6
+ store i8 %20, ptr %21, align 1
+ %22 = getelementptr i8, ptr %1, i64 14
+ %23 = load i8, ptr %22, align 1
+ %24 = getelementptr i8, ptr %2, i64 7
+ store i8 %23, ptr %24, align 1
+ ret void
+}
More information about the llvm-commits
mailing list