[llvm] [SLP]Enable interleaved stores support (PR #115354)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 08:01:48 PST 2024
https://github.com/alexey-bataev updated https://github.com/llvm/llvm-project/pull/115354
>From 88c8eae3602e61b1b65457fcddd649645e2b8f93 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Thu, 7 Nov 2024 18:05:25 +0000
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 40 +++++++++++++++++--
.../SLPVectorizer/RISCV/segmented-stores.ll | 15 ++-----
2 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 184413b420089a..bff0684a2e6f16 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9759,6 +9759,29 @@ void BoUpSLP::transformNodes() {
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
E.State = TreeEntry::StridedVectorize;
+ } else if (!E.ReorderIndices.empty()) {
+ // Check for interleaved stores.
+ auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
+ assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
+ if (Mask.size() < 4)
+ return 0u;
+ for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
+ if (ShuffleVectorInst::isInterleaveMask(
+ Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
+ TTI.isLegalInterleavedAccessType(
+ VecTy, Factor,
+ cast<StoreInst>(E.Scalars.front())->getAlign(),
+ cast<StoreInst>(E.Scalars.front())
+ ->getPointerAddressSpace()))
+ return Factor;
+ }
+
+ return 0u;
+ };
+ SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
+ unsigned InterleaveFactor = IsInterleaveMask(Mask);
+ if (InterleaveFactor != 0)
+ E.setInterleave(InterleaveFactor);
}
break;
}
@@ -11428,10 +11451,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
- TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
- VecStCost = TTI->getMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getAlign(),
- BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ if (unsigned Factor = E->getInterleaveFactor()) {
+ assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
+ "No reused shuffles expected");
+ CommonCost = 0;
+ VecStCost = TTI->getInterleavedMemoryOpCost(
+ Instruction::Store, VecTy, Factor, std::nullopt,
+ BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
+ } else {
+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
+ VecStCost = TTI->getMemoryOpCost(
+ Instruction::Store, VecTy, BaseSI->getAlign(),
+ BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ }
}
return VecStCost + CommonCost;
};
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
index ae1c3e1ee0da20..071d0b972f23a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '-1'
+; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
define void @test(ptr %h) {
@@ -17,16 +17,9 @@ define void @test(ptr %h) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DCT2X211:%.*]] = alloca [0 x [0 x [8 x i64]]], i32 0, align 16
; CHECK-NEXT: [[CHROMA_DC209:%.*]] = getelementptr i8, ptr [[H]], i64 0
-; CHECK-NEXT: [[ARRAYIDX33_I:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 8
-; CHECK-NEXT: [[ARRAYIDX36_I181:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 24
-; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 4 [[DCT2X211]], i64 16, <2 x i1> splat (i1 true), i32 2)
-; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[CHROMA_DC209]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX33_I]], align 2
-; CHECK-NEXT: [[ARRAYIDX5_I226:%.*]] = getelementptr i8, ptr [[H]], i64 16
-; CHECK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX5_I226]], align 2
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX36_I181]], align 2
-; CHECK-NEXT: [[ARRAYIDX7_I228:%.*]] = getelementptr i8, ptr [[H]], i64 24
-; CHECK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX7_I228]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[DCT2X211]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[CHROMA_DC209]], align 2
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list