[llvm] [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. (PR #111459)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 08:44:42 PDT 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/111459
>From 0ff75ffad5e7c3a44040b04bbf3cf4392f35f184 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sat, 5 Oct 2024 14:58:44 +0800
Subject: [PATCH 1/4] [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector
mask is multiple insert subvector. (#110457)
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 47 +++++++++++++++++++
.../Target/RISCV/RISCVTargetTransformInfo.h | 6 +++
.../RISCV/fixed-vector-insert-subvector.ll | 18 +++++++
.../RISCV/remarks-insert-into-small-vector.ll | 2 +-
.../RISCV/revec-getGatherCost.ll | 4 +-
5 files changed, 74 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index a61461681f79ed..1e5321a9ace41b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,6 +343,49 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
/*AddressSpace=*/0, CostKind);
}
+InstructionCost
+RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<FixedVectorType>(Tp))
+ return InstructionCost::getInvalid();
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ if (LT.second.getScalarSizeInBits() == 1)
+ return InstructionCost::getInvalid();
+ // Try to guess SubTp.
+ for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E;
+ SubVecSize <<= 1) {
+ if (E % SubVecSize != 0)
+ continue;
+ SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
+ bool Skip = false;
+ for (unsigned I = 0; I != E; I += SubVecSize)
+ if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
+ Skip = true;
+ break;
+ }
+ if (Skip)
+ continue;
+ InstructionCost Cost = 0;
+ unsigned NumSlides = Log2_32(E / SubVecSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != NumSlides; ++I) {
+ unsigned InsertIndex = SubVecSize * (1 << I);
+ FixedVectorType *SubTp = FixedVectorType::get(
+ cast<FixedVectorType>(Tp)->getElementType(), InsertIndex);
+ FixedVectorType *DesTp =
+ FixedVectorType::getDoubleElementsVectorType(SubTp);
+ std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
+ // Add the cost of whole vector register move because the destination
+ // vector register group for vslideup cannot overlap the source.
+ Cost += DesLT.first * TLI->getLMULCost(DesLT.second);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind,
+ InsertIndex, SubTp);
+ }
+ return Cost;
+ }
+ return InstructionCost::getInvalid();
+}
+
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
LLVMContext &C) {
assert((DataVT.getScalarSizeInBits() != 8 ||
@@ -394,6 +437,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second, CostKind);
}
}
+ if (InstructionCost Cost =
+ isMultipleInsertSubvector(Tp, Mask, CostKind);
+ Cost.isValid())
+ return Cost;
}
// vrgather + cost of generating the mask constant.
// We model this for an unknown mask with a single vrgather.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 65bbd905508557..9d6317baabc658 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,6 +55,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
TTI::TargetCostKind CostKind);
+
+ /// Return the cost if a shufflevector can be consist of multiple vslideup.
+ /// Otherwise, return InstructionCost::getInvalid().
+ InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind);
+
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
new file mode 100644
index 00000000000000..47a2af92aee950
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+ %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
index bb806be15c71ca..23a9a654c96f9e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '2'
+; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '7'
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 995cd7cfbc880b..a0cb52a853b7e6 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
; YAML: Function: test1
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '6'
+; YAML: - Cost: '4'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
@@ -47,7 +47,7 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
; YAML: Function: test2
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '16'
+; YAML: - Cost: '12'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
>From 153b94b5c21e2e76e793a1508ee9aef9ccea9926 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 7 Oct 2024 17:53:31 -0700
Subject: [PATCH 2/4] Apply comments.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 1e5321a9ace41b..4229114d58bfdf 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -351,14 +351,14 @@ RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
if (LT.second.getScalarSizeInBits() == 1)
return InstructionCost::getInvalid();
+ unsigned Size = Mask.size();
+ if (!isPowerOf2_32(Size))
+ return InstructionCost::getInvalid();
// Try to guess SubTp.
- for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E;
- SubVecSize <<= 1) {
- if (E % SubVecSize != 0)
- continue;
+ for (unsigned SubVecSize = 1; SubVecSize < Size; SubVecSize <<= 1) {
SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
bool Skip = false;
- for (unsigned I = 0; I != E; I += SubVecSize)
+ for (unsigned I = 0; I != Size; I += SubVecSize)
if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
Skip = true;
break;
@@ -366,12 +366,12 @@ RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
if (Skip)
continue;
InstructionCost Cost = 0;
- unsigned NumSlides = Log2_32(E / SubVecSize);
+ unsigned NumSlides = Log2_32(Size / SubVecSize);
// The cost of extraction from a subvector is 0 if the index is 0.
for (unsigned I = 0; I != NumSlides; ++I) {
unsigned InsertIndex = SubVecSize * (1 << I);
- FixedVectorType *SubTp = FixedVectorType::get(
- cast<FixedVectorType>(Tp)->getElementType(), InsertIndex);
+ FixedVectorType *SubTp =
+ FixedVectorType::get(Tp->getElementType(), InsertIndex);
FixedVectorType *DesTp =
FixedVectorType::getDoubleElementsVectorType(SubTp);
std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
>From bd815854fd7e3f8100dc008a8054a511833c6ce3 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Mon, 7 Oct 2024 20:49:38 -0700
Subject: [PATCH 3/4] Apply comments.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 55 ++++++++++---------
1 file changed, 28 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 4229114d58bfdf..8d5e685f3ea55b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -354,36 +354,37 @@ RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
unsigned Size = Mask.size();
if (!isPowerOf2_32(Size))
return InstructionCost::getInvalid();
- // Try to guess SubTp.
- for (unsigned SubVecSize = 1; SubVecSize < Size; SubVecSize <<= 1) {
- SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
- bool Skip = false;
- for (unsigned I = 0; I != Size; I += SubVecSize)
- if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
- Skip = true;
- break;
- }
- if (Skip)
+ // Try to guess subvector size.
+ unsigned SubVecSize;
+ for (unsigned I = 0; I != Size; ++I) {
+ if (static_cast<unsigned>(Mask[I]) == I)
continue;
- InstructionCost Cost = 0;
- unsigned NumSlides = Log2_32(Size / SubVecSize);
- // The cost of extraction from a subvector is 0 if the index is 0.
- for (unsigned I = 0; I != NumSlides; ++I) {
- unsigned InsertIndex = SubVecSize * (1 << I);
- FixedVectorType *SubTp =
- FixedVectorType::get(Tp->getElementType(), InsertIndex);
- FixedVectorType *DesTp =
- FixedVectorType::getDoubleElementsVectorType(SubTp);
- std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
- // Add the cost of whole vector register move because the destination
- // vector register group for vslideup cannot overlap the source.
- Cost += DesLT.first * TLI->getLMULCost(DesLT.second);
- Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind,
- InsertIndex, SubTp);
+ if (Mask[I] == 0) {
+ SubVecSize = I;
+ break;
}
- return Cost;
+ return InstructionCost::getInvalid();
}
- return InstructionCost::getInvalid();
+ for (unsigned I = 0; I != Size; ++I)
+ if (static_cast<unsigned>(Mask[I]) != I % SubVecSize)
+ return InstructionCost::getInvalid();
+ InstructionCost Cost = 0;
+ unsigned NumSlides = Log2_32(Size / SubVecSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != NumSlides; ++I) {
+ unsigned InsertIndex = SubVecSize * (1 << I);
+ FixedVectorType *SubTp =
+ FixedVectorType::get(Tp->getElementType(), InsertIndex);
+ FixedVectorType *DesTp =
+ FixedVectorType::getDoubleElementsVectorType(SubTp);
+ std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
+ // Add the cost of whole vector register move because the destination vector
+ // register group for vslideup cannot overlap the source.
+ Cost += DesLT.first * TLI->getLMULCost(DesLT.second);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind,
+ InsertIndex, SubTp);
+ }
+ return Cost;
}
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
>From e29eb38471eeb0aaad7a590fd63dcc338e1ee9df Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Tue, 8 Oct 2024 08:44:22 -0700
Subject: [PATCH 4/4] Apply comments.
---
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8d5e685f3ea55b..352dcfb6f1ffa6 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -365,6 +365,8 @@ RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
}
return InstructionCost::getInvalid();
}
+ if (Size % SubVecSize != 0)
+ return InstructionCost::getInvalid();
for (unsigned I = 0; I != Size; ++I)
if (static_cast<unsigned>(Mask[I]) != I % SubVecSize)
return InstructionCost::getInvalid();
@@ -375,13 +377,13 @@ RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
unsigned InsertIndex = SubVecSize * (1 << I);
FixedVectorType *SubTp =
FixedVectorType::get(Tp->getElementType(), InsertIndex);
- FixedVectorType *DesTp =
+ FixedVectorType *DestTp =
FixedVectorType::getDoubleElementsVectorType(SubTp);
- std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
+ std::pair<InstructionCost, MVT> DestLT = getTypeLegalizationCost(DestTp);
// Add the cost of whole vector register move because the destination vector
// register group for vslideup cannot overlap the source.
- Cost += DesLT.first * TLI->getLMULCost(DesLT.second);
- Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind,
+ Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, DestTp, {}, CostKind,
InsertIndex, SubTp);
}
return Cost;
More information about the llvm-commits
mailing list