[llvm] [SLP][REVEC] Make getExtractWithExtendCost support FixedVectorType as Dst. (PR #134822)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 8 06:29:52 PDT 2025
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/134822
>From 35a1a410d31cbcf93d75d79d81c302ecd585d18c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 02:27:59 -0700
Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test.
---
.../X86/revec-getExtractWithExtendCost.ll | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
new file mode 100644
index 0000000000000..a3664c6d2ab99
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+
+define void @test() {
+entry:
+ %0 = sub <8 x i64> zeroinitializer, splat (i64 1)
+ %1 = sub <8 x i64> zeroinitializer, zeroinitializer
+ %2 = or <8 x i64> %0, zeroinitializer
+ %3 = trunc <8 x i64> %0 to <8 x i32>
+ %4 = trunc <8 x i64> %1 to <8 x i32>
+ %5 = getelementptr i8, ptr null, i64 32
+ store <8 x i32> %3, ptr null, align 4
+ store <8 x i32> %4, ptr %5, align 4
+ ret void
+}
>From 7aae441a88b97dff6819c37c680d9d662e38587e Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 02:25:39 -0700
Subject: [PATCH 2/3] [SLP][REVEC] Make getExtractWithExtendCost support
FixedVectorType as Dst.
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 27 ++++++++++++++++---
.../X86/revec-getExtractWithExtendCost.ll | 12 +++++++++
2 files changed, 36 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6559f26be8c2..6a96d6e40674c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5399,6 +5399,25 @@ static InstructionCost getVectorInstrCost(
ScalarUserAndIdx);
}
+/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
+/// is a FixedVectorType, a vector will be extracted instead of a scalar.
+static InstructionCost getExtractWithExtendCost(const TargetTransformInfo &TTI,
+ unsigned Opcode, Type *Dst,
+ VectorType *VecTy,
+ unsigned Index) {
+ if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
+ assert(SLPReVec && "Only supported by REVEC.");
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ auto *SubTp =
+ getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
+ return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
+ Index * ScalarTy->getNumElements(), SubTp) +
+ TTI.getCastInstrCost(Opcode, Dst, SubTp, TTI::CastContextHint::None,
+ CostKind);
+ }
+ return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+}
+
/// Correctly creates insert_subvector, checking that the index is multiple of
/// the subvectors length. Otherwise, generates shuffle using \p Generator or
/// using default shuffle.
@@ -14088,13 +14107,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
const TreeEntry *Entry = &EU.E;
auto It = MinBWs.find(Entry);
if (It != MinBWs.end()) {
- auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
+ Type *MinTy = IntegerType::get(F->getContext(), It->second.first);
+ if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy))
+ MinTy = getWidenedType(MinTy, VecTy->getNumElements());
unsigned Extend = isKnownNonNegative(EU.Scalar, SimplifyQuery(*DL))
? Instruction::ZExt
: Instruction::SExt;
VecTy = getWidenedType(MinTy, BundleWidth);
- ExtraCost = TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
- VecTy, EU.Lane);
+ ExtraCost =
+ getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
} else {
ExtraCost =
getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
index a3664c6d2ab99..8d3d9f2979298 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getExtractWithExtendCost.ll
@@ -2,6 +2,18 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx10.2-512 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = sub <8 x i64> zeroinitializer, splat (i64 1)
+; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i64> zeroinitializer, zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP0]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr null, i64 32
+; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4
+; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP5]], align 4
+; CHECK-NEXT: ret void
+;
entry:
%0 = sub <8 x i64> zeroinitializer, splat (i64 1)
%1 = sub <8 x i64> zeroinitializer, zeroinitializer
>From a2a0d6d4dd167ed654a9518840af0161d463fb2c Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Apr 2025 06:29:37 -0700
Subject: [PATCH 3/3] apply comment
---
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6a96d6e40674c..a916792a4b4fd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5401,13 +5401,12 @@ static InstructionCost getVectorInstrCost(
/// This is similar to TargetTransformInfo::getExtractWithExtendCost, but if Dst
/// is a FixedVectorType, a vector will be extracted instead of a scalar.
-static InstructionCost getExtractWithExtendCost(const TargetTransformInfo &TTI,
- unsigned Opcode, Type *Dst,
- VectorType *VecTy,
- unsigned Index) {
+static InstructionCost getExtractWithExtendCost(
+ const TargetTransformInfo &TTI, unsigned Opcode, Type *Dst,
+ VectorType *VecTy, unsigned Index,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
if (auto *ScalarTy = dyn_cast<FixedVectorType>(Dst)) {
assert(SLPReVec && "Only supported by REVEC.");
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto *SubTp =
getWidenedType(VecTy->getElementType(), ScalarTy->getNumElements());
return getShuffleCost(TTI, TTI::SK_ExtractSubvector, VecTy, {}, CostKind,
More information about the llvm-commits
mailing list