[llvm] f6e1d64 - [SLP]Enable interleaved stores support
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 15 08:02:01 PST 2024
Author: Alexey Bataev
Date: 2024-11-15T11:01:57-05:00
New Revision: f6e1d64458130643377511baeec430de67ddddfb
URL: https://github.com/llvm/llvm-project/commit/f6e1d64458130643377511baeec430de67ddddfb
DIFF: https://github.com/llvm/llvm-project/commit/f6e1d64458130643377511baeec430de67ddddfb.diff
LOG: [SLP]Enable interleaved stores support
Enables interaleaved stores, results in better estimation for segmented
stores for RISC-V
Reviewers: preames, topperc, RKSimon
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/115354
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc7a65c0fd70b9..918d7663548f51 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9771,6 +9771,28 @@ void BoUpSLP::transformNodes() {
// Strided store is more profitable than reverse + consecutive store -
// transform the node to strided store.
E.State = TreeEntry::StridedVectorize;
+ } else if (!E.ReorderIndices.empty()) {
+ // Check for interleaved stores.
+ auto IsInterleaveMask = [&, &TTI = *TTI](ArrayRef<int> Mask) {
+ auto *BaseSI = cast<StoreInst>(E.Scalars.front());
+ assert(Mask.size() > 1 && "Expected mask greater than 1 element.");
+ if (Mask.size() < 4)
+ return 0u;
+ for (unsigned Factor : seq<unsigned>(2, Mask.size() / 2 + 1)) {
+ if (ShuffleVectorInst::isInterleaveMask(
+ Mask, Factor, VecTy->getElementCount().getFixedValue()) &&
+ TTI.isLegalInterleavedAccessType(
+ VecTy, Factor, BaseSI->getAlign(),
+ BaseSI->getPointerAddressSpace()))
+ return Factor;
+ }
+
+ return 0u;
+ };
+ SmallVector<int> Mask(E.ReorderIndices.begin(), E.ReorderIndices.end());
+ unsigned InterleaveFactor = IsInterleaveMask(Mask);
+ if (InterleaveFactor != 0)
+ E.setInterleave(InterleaveFactor);
}
break;
}
@@ -11441,10 +11463,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
} else {
assert(E->State == TreeEntry::Vectorize &&
"Expected either strided or consecutive stores.");
- TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
- VecStCost = TTI->getMemoryOpCost(
- Instruction::Store, VecTy, BaseSI->getAlign(),
- BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ if (unsigned Factor = E->getInterleaveFactor()) {
+ assert(E->ReuseShuffleIndices.empty() && !E->ReorderIndices.empty() &&
+ "No reused shuffles expected");
+ CommonCost = 0;
+ VecStCost = TTI->getInterleavedMemoryOpCost(
+ Instruction::Store, VecTy, Factor, std::nullopt,
+ BaseSI->getAlign(), BaseSI->getPointerAddressSpace(), CostKind);
+ } else {
+ TTI::OperandValueInfo OpInfo = getOperandInfo(E->getOperand(0));
+ VecStCost = TTI->getMemoryOpCost(
+ Instruction::Store, VecTy, BaseSI->getAlign(),
+ BaseSI->getPointerAddressSpace(), CostKind, OpInfo);
+ }
}
return VecStCost + CommonCost;
};
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
index ae1c3e1ee0da20..071d0b972f23a3 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/segmented-stores.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '-1'
+; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '2'
define void @test(ptr %h) {
@@ -17,16 +17,9 @@ define void @test(ptr %h) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[DCT2X211:%.*]] = alloca [0 x [0 x [8 x i64]]], i32 0, align 16
; CHECK-NEXT: [[CHROMA_DC209:%.*]] = getelementptr i8, ptr [[H]], i64 0
-; CHECK-NEXT: [[ARRAYIDX33_I:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 8
-; CHECK-NEXT: [[ARRAYIDX36_I181:%.*]] = getelementptr i8, ptr [[DCT2X211]], i64 24
-; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr align 4 [[DCT2X211]], i64 16, <2 x i1> splat (i1 true), i32 2)
-; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[CHROMA_DC209]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX33_I]], align 2
-; CHECK-NEXT: [[ARRAYIDX5_I226:%.*]] = getelementptr i8, ptr [[H]], i64 16
-; CHECK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX5_I226]], align 2
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX36_I181]], align 2
-; CHECK-NEXT: [[ARRAYIDX7_I228:%.*]] = getelementptr i8, ptr [[H]], i64 24
-; CHECK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX7_I228]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[DCT2X211]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[CHROMA_DC209]], align 2
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list