[llvm] [CostModel] getInstructionCost - improve estimation of costs for length changing shuffles (PR #84156)

Wed Mar 6 03:56:41 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Fix gap in the cost estimation for length changing shuffles, by widening/narrowing the shuffle mask and either widening the shuffle inputs or extracting the lower elements of the result

A small step towards moving some of this implementation inside improveShuffleKindFromMask and/or target getShuffleCost handlers (and reduce the diffs in cost estimation depending on whether coming from a ShuffleVectorInst or the raw operands / mask components)

---

Patch is 351.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84156.diff


12 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+37-8) 
- (modified) llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll (+184-184) 
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll (+2-2) 
- (modified) llvm/test/Analysis/CostModel/RISCV/shuffle-interleave.ll (+2-2) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-codesize.ll (+106-64) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-latency.ll (+110-68) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector-sizelatency.ll (+106-64) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-concat_subvector.ll (+110-68) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll (+26-13) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll (+27-14) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll (+26-13) 
- (modified) llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll (+27-14) 


``````````diff

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 95fb13d1c97154..71939088bef641 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1326,28 +1326,29 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
       int NumSubElts, SubIndex;
 
+      // TODO: move more of this inside improveShuffleKindFromMask.
       if (Shuffle->changesLength()) {
+        ArrayRef<int> Mask = Shuffle->getShuffleMask();
+
         // Treat a 'subvector widening' as a free shuffle.
         if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
           return 0;
 
         if (Shuffle->isExtractSubvectorMask(SubIndex))
           return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
-                                           Shuffle->getShuffleMask(), CostKind,
-                                           SubIndex, VecTy, Operands);
+                                           Mask, CostKind, SubIndex, VecTy,
+                                           Operands);
 
         if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
           return TargetTTI->getShuffleCost(
-              TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
-              CostKind, SubIndex,
+              TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
               FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
               Operands);
 
         int ReplicationFactor, VF;
         if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
-          APInt DemandedDstElts =
-              APInt::getZero(Shuffle->getShuffleMask().size());
-          for (auto I : enumerate(Shuffle->getShuffleMask())) {
+          APInt DemandedDstElts = APInt::getZero(Mask.size());
+          for (auto I : enumerate(Mask)) {
             if (I.value() != PoisonMaskElem)
               DemandedDstElts.setBit(I.index());
           }
@@ -1356,7 +1357,35 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
               DemandedDstElts, CostKind);
         }
 
-        return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
+        bool IsUnary = isa<UndefValue>(Operands[1]);
+        NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
+        SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end());
+
+        // Widening shuffle - widening the source(s) to the new length
+        // (treated as free - see above), and then perform the adjusted
+        // shuffle at that width.
+        if (Shuffle->increasesLength()) {
+          for (int &M : AdjustMask)
+            M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
+
+          return TargetTTI->getShuffleCost(
+              IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
+              AdjustMask, CostKind, 0, nullptr);
+        }
+
+        // Narrowing shuffle - perform shuffle at original wider width and
+        // then extract the lower elements.
+        AdjustMask.append(NumSubElts - Mask.size(), -1);
+
+        InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
+            IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
+            VecSrcTy, AdjustMask, CostKind, 0, nullptr);
+
+        SmallVector<int, 16> ExtractMask(Mask.size(), -1);
+        std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
+        return ShuffleCost + TargetTTI->getShuffleCost(
+                                 TTI::SK_ExtractSubvector, VecTy, ExtractMask,
+                                 CostKind, 0, VecSrcTy, Operands);
       }
 
       if (Shuffle->isIdentity())
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
index 58f9dd3633e2c4..be5cca0765edf1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
@@ -25,27 +25,27 @@ define amdgpu_kernel void @shufflevector_i16() {
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; VI-LABEL: 'shufflevector_i16'
@@ -65,27 +65,27 @@ define amdgpu_kernel void @shufflevector_i16() {
 ; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-10-SIZE-LABEL: 'shufflevector_i16'
@@ -105,27 +105,27 @@ define amdgpu_kernel void @shufflevector_i16() {
 ; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
 ; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0,...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/84156