[llvm] [CostModel] getInstructionCost - improve estimation of costs for length changing shuffles (PR #84156)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 03:56:41 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Fix gap in the cost estimation for length changing shuffles, by widening/narrowing the shuffle mask and either widening the shuffle inputs or extracting the lower elements of the result
A small step towards moving some of this implementation inside improveShuffleKindFromMask and/or target getShuffleCost handlers (and reduce the diffs in cost estimation depending on whether coming from a ShuffleVectorInst or the raw operands / mask components)
---
Patch is 351.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84156.diff
12 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+37-8)
- (modified) llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll (+184-184)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll (+2-2)
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll (+2-2)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll (+106-64)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll (+110-68)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll (+106-64)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll (+110-68)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll (+26-13)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll (+27-14)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll (+26-13)
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll (+27-14)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 95fb13d1c97154..71939088bef641 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1326,28 +1326,29 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
int NumSubElts, SubIndex;
+ // TODO: move more of this inside improveShuffleKindFromMask.
if (Shuffle->changesLength()) {
+ ArrayRef<int> Mask = Shuffle->getShuffleMask();
+
// Treat a 'subvector widening' as a free shuffle.
if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
return 0;
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
- Shuffle->getShuffleMask(), CostKind,
- SubIndex, VecTy, Operands);
+ Mask, CostKind, SubIndex, VecTy,
+ Operands);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
- TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
- CostKind, SubIndex,
+ TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands);
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
- APInt DemandedDstElts =
- APInt::getZero(Shuffle->getShuffleMask().size());
- for (auto I : enumerate(Shuffle->getShuffleMask())) {
+ APInt DemandedDstElts = APInt::getZero(Mask.size());
+ for (auto I : enumerate(Mask)) {
if (I.value() != PoisonMaskElem)
DemandedDstElts.setBit(I.index());
}
@@ -1356,7 +1357,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
DemandedDstElts, CostKind);
}
- return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
+ bool IsUnary = isa<UndefValue>(Operands[1]);
+ NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
+ SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end());
+
+ // Widening shuffle - widening the source(s) to the new length
+ // (treated as free - see above), and then perform the adjusted
+ // shuffle at that width.
+ if (Shuffle->increasesLength()) {
+ for (int &M : AdjustMask)
+ M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
+
+ return TargetTTI->getShuffleCost(
+ IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
+ AdjustMask, CostKind, 0, nullptr);
+ }
+
+ // Narrowing shuffle - perform shuffle at original wider width and
+ // then extract the lower elements.
+ AdjustMask.append(NumSubElts - Mask.size(), -1);
+
+ InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
+ IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
+ VecSrcTy, AdjustMask, CostKind, 0, nullptr);
+
+ SmallVector<int, 16> ExtractMask(Mask.size(), -1);
+ std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
+ return ShuffleCost + TargetTTI->getShuffleCost(
+ TTI::SK_ExtractSubvector, VecTy, ExtractMask,
+ CostKind, 0, VecSrcTy, Operands);
}
if (Shuffle->isIdentity())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
index 58f9dd3633e2c4..be5cca0765edf1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
@@ -25,27 +25,27 @@ define amdgpu_kernel void @shufflevector_i16() {
; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX9-10-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
-; GFX9-10-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; GFX9-10-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
; GFX9-10-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; VI-LABEL: 'shufflevector_i16'
@@ -65,27 +65,27 @@ define amdgpu_kernel void @shufflevector_i16() {
; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
; VI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
-; VI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; VI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; VI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
; VI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
;
; GFX9-10-SIZE-LABEL: 'shufflevector_i16'
@@ -105,27 +105,27 @@ define amdgpu_kernel void @shufflevector_i16() {
; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; GFX9-10-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/84156
More information about the llvm-commits
mailing list