[llvm] [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. (PR #111459)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 17 01:01:04 PDT 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/111459
>From b9d2d1764bca4e3534acc41d3b4031a6a0f757f8 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Sat, 5 Oct 2024 14:58:44 +0800
Subject: [PATCH 1/2] [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector
mask is multiple insert subvector.
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 50 +++++++++++++++++++
.../Target/RISCV/RISCVTargetTransformInfo.h | 6 +++
.../RISCV/fixed-vector-insert-subvector.ll | 18 +++++++
.../RISCV/remarks-insert-into-small-vector.ll | 2 +-
.../RISCV/revec-getGatherCost.ll | 4 +-
5 files changed, 77 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8d18fd63e4a2e1..3ff9831678a9fc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,6 +343,52 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
/*AddressSpace=*/0, CostKind);
}
+InstructionCost
+RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<FixedVectorType>(Tp))
+ return InstructionCost::getInvalid();
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ if (LT.second.getScalarSizeInBits() == 1)
+ return InstructionCost::getInvalid();
+ unsigned Size = Mask.size();
+ if (!isPowerOf2_32(Size))
+ return InstructionCost::getInvalid();
+ // Try to guess subvector size.
+ unsigned SubVecSize;
+ for (unsigned I = 0; I != Size; ++I) {
+ if (static_cast<unsigned>(Mask[I]) == I)
+ continue;
+ if (Mask[I] == 0) {
+ SubVecSize = I;
+ break;
+ }
+ return InstructionCost::getInvalid();
+ }
+ if (Size % SubVecSize != 0)
+ return InstructionCost::getInvalid();
+ for (unsigned I = 0; I != Size; ++I)
+ if (static_cast<unsigned>(Mask[I]) != I % SubVecSize)
+ return InstructionCost::getInvalid();
+ InstructionCost Cost = 0;
+ unsigned NumSlides = Log2_32(Size / SubVecSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != NumSlides; ++I) {
+ unsigned InsertIndex = SubVecSize * (1 << I);
+ FixedVectorType *SubTp =
+ FixedVectorType::get(Tp->getElementType(), InsertIndex);
+ FixedVectorType *DestTp =
+ FixedVectorType::getDoubleElementsVectorType(SubTp);
+ std::pair<InstructionCost, MVT> DestLT = getTypeLegalizationCost(DestTp);
+ // Add the cost of whole vector register move because the destination vector
+ // register group for vslideup cannot overlap the source.
+ Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, DestTp, {}, CostKind,
+ InsertIndex, SubTp);
+ }
+ return Cost;
+}
+
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
LLVMContext &C) {
assert((DataVT.getScalarSizeInBits() != 8 ||
@@ -394,6 +440,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second, CostKind);
}
}
+ if (InstructionCost Cost =
+ isMultipleInsertSubvector(Tp, Mask, CostKind);
+ Cost.isValid())
+ return Cost;
}
// vrgather + cost of generating the mask constant.
// We model this for an unknown mask with a single vrgather.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 3f50bd86b9b3b6..66c5b8c2681417 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,6 +55,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
TTI::TargetCostKind CostKind);
+
+ /// Return the cost if a shufflevector can be consist of multiple vslideup.
+ /// Otherwise, return InstructionCost::getInvalid().
+ InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind);
+
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
new file mode 100644
index 00000000000000..47a2af92aee950
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+ %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
index 09612444afd205..4788e1ef715593 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '0'
+; YAML-NEXT: - Cost: '-2'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '9'
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 995cd7cfbc880b..a0cb52a853b7e6 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
; YAML: Function: test1
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '6'
+; YAML: - Cost: '4'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
@@ -47,7 +47,7 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
; YAML: Function: test2
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '16'
+; YAML: - Cost: '12'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
>From 39c1368f9cfa6540da97bc2f2bc4f3d60b5e7ac2 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Thu, 17 Oct 2024 00:33:05 -0700
Subject: [PATCH 2/2] apply comment
---
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 77 +++++++++----------
.../Target/RISCV/RISCVTargetTransformInfo.h | 6 --
2 files changed, 36 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 3ff9831678a9fc..32b72dfed4c045 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,50 +343,26 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
/*AddressSpace=*/0, CostKind);
}
-InstructionCost
-RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind) {
- if (!isa<FixedVectorType>(Tp))
- return InstructionCost::getInvalid();
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
- if (LT.second.getScalarSizeInBits() == 1)
- return InstructionCost::getInvalid();
+static bool isRepeatedConcatMaskImpl(ArrayRef<int> Mask, int &SubVectorSize) {
unsigned Size = Mask.size();
if (!isPowerOf2_32(Size))
- return InstructionCost::getInvalid();
- // Try to guess subvector size.
- unsigned SubVecSize;
+ return false;
for (unsigned I = 0; I != Size; ++I) {
if (static_cast<unsigned>(Mask[I]) == I)
continue;
- if (Mask[I] == 0) {
- SubVecSize = I;
- break;
- }
- return InstructionCost::getInvalid();
- }
- if (Size % SubVecSize != 0)
- return InstructionCost::getInvalid();
- for (unsigned I = 0; I != Size; ++I)
- if (static_cast<unsigned>(Mask[I]) != I % SubVecSize)
- return InstructionCost::getInvalid();
- InstructionCost Cost = 0;
- unsigned NumSlides = Log2_32(Size / SubVecSize);
- // The cost of extraction from a subvector is 0 if the index is 0.
- for (unsigned I = 0; I != NumSlides; ++I) {
- unsigned InsertIndex = SubVecSize * (1 << I);
- FixedVectorType *SubTp =
- FixedVectorType::get(Tp->getElementType(), InsertIndex);
- FixedVectorType *DestTp =
- FixedVectorType::getDoubleElementsVectorType(SubTp);
- std::pair<InstructionCost, MVT> DestLT = getTypeLegalizationCost(DestTp);
- // Add the cost of whole vector register move because the destination vector
- // register group for vslideup cannot overlap the source.
- Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
- Cost += getShuffleCost(TTI::SK_InsertSubvector, DestTp, {}, CostKind,
- InsertIndex, SubTp);
+ if (Mask[I] != 0)
+ return false;
+ if (Size % I != 0)
+ return false;
+ for (unsigned J = 0; J != Size; ++J)
+ // Check the pattern is repeated.
+ if (static_cast<unsigned>(Mask[J]) != J % I)
+ return false;
+ SubVectorSize = I;
+ return true;
}
- return Cost;
+ // That means Mask is <0, 1, 2, 3>. This is not a concatenation.
+ return false;
}
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
@@ -440,10 +416,29 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second, CostKind);
}
}
- if (InstructionCost Cost =
- isMultipleInsertSubvector(Tp, Mask, CostKind);
- Cost.isValid())
+ int SubVectorSize;
+ if (LT.second.getScalarSizeInBits() != 1 &&
+ isRepeatedConcatMaskImpl(Mask, SubVectorSize)) {
+ InstructionCost Cost = 0;
+ unsigned NumSlides = Log2_32(Mask.size() / SubVectorSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != NumSlides; ++I) {
+ unsigned InsertIndex = SubVectorSize * (1 << I);
+ FixedVectorType *SubTp =
+ FixedVectorType::get(Tp->getElementType(), InsertIndex);
+ FixedVectorType *DestTp =
+ FixedVectorType::getDoubleElementsVectorType(SubTp);
+ std::pair<InstructionCost, MVT> DestLT =
+ getTypeLegalizationCost(DestTp);
+ // Add the cost of whole vector register move because the
+ // destination vector register group for vslideup cannot overlap the
+ // source.
+ Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, DestTp, {},
+ CostKind, InsertIndex, SubTp);
+ }
return Cost;
+ }
}
// vrgather + cost of generating the mask constant.
// We model this for an unknown mask with a single vrgather.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 66c5b8c2681417..3f50bd86b9b3b6 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,12 +55,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
TTI::TargetCostKind CostKind);
-
- /// Return the cost if a shufflevector can be consist of multiple vslideup.
- /// Otherwise, return InstructionCost::getInvalid().
- InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
- TTI::TargetCostKind CostKind);
-
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
More information about the llvm-commits
mailing list