[llvm] ab05ab5 - [CostModel][AMDGPU] Fix instructions costs estimation for vector types.

Thu Dec 2 16:08:24 PST 2021

Author: Daniil Fukalov
Date: 2021-12-03T03:08:08+03:00
New Revision: ab05ab59a7bd22889fe19389431676b3414c6b7d

URL: https://github.com/llvm/llvm-project/commit/ab05ab59a7bd22889fe19389431676b3414c6b7d
DIFF: https://github.com/llvm/llvm-project/commit/ab05ab59a7bd22889fe19389431676b3414c6b7d.diff

LOG: [CostModel][AMDGPU] Fix instructions costs estimation for vector types.

1. Fixed vector instructions costs estimations incosistency - removed different
   logic for "not simple types" since it biases costs for these types.
2. Fixed legalization penalty for vectors too big for the target: changed from
   overwrite default legalization cost value estimation to added penalty.
3. Fixed few typos in tests.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D114893

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
    llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
    llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
    llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
    llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
    llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
    llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
    llvm/test/Analysis/CostModel/AMDGPU/mul.ll
    llvm/test/Analysis/CostModel/AMDGPU/shifts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index ecdbdf613a534..4a31a598fc789 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -519,57 +519,6 @@ InstructionCost GCNTTIImpl::getArithmeticInstrCost(
     TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
     const Instruction *CxtI) {
-  EVT OrigTy = TLI->getValueType(DL, Ty);
-  if (!OrigTy.isSimple()) {
-    // FIXME: We're having to query the throughput cost so that the basic
-    // implementation tries to generate legalize and scalarization costs. Maybe
-    // we could hoist the scalarization code here?
-    if (CostKind != TTI::TCK_CodeSize)
-      return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
-                                           Opd1Info, Opd2Info, Opd1PropInfo,
-                                           Opd2PropInfo, Args, CxtI);
-    // Scalarization
-
-    // Check if any of the operands are vector operands.
-    int ISD = TLI->InstructionOpcodeToISD(Opcode);
-    assert(ISD && "Invalid opcode");
-
-    std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
-
-    bool IsFloat = Ty->isFPOrFPVectorTy();
-    // Assume that floating point arithmetic operations cost twice as much as
-    // integer operations.
-    unsigned OpCost = (IsFloat ? 2 : 1);
-
-    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
-      // The operation is legal. Assume it costs 1.
-      // TODO: Once we have extract/insert subvector cost we need to use them.
-      return LT.first * OpCost;
-    }
-
-    if (!TLI->isOperationExpand(ISD, LT.second)) {
-      // If the operation is custom lowered, then assume that the code is twice
-      // as expensive.
-      return LT.first * 2 * OpCost;
-    }
-
-    // Else, assume that we need to scalarize this op.
-    // TODO: If one of the types get legalized by splitting, handle this
-    // similarly to what getCastInstrCost() does.
-    if (auto *VTy = dyn_cast<VectorType>(Ty)) {
-      unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
-      InstructionCost Cost = getArithmeticInstrCost(
-          Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
-          Opd1PropInfo, Opd2PropInfo, Args, CxtI);
-      // Return the cost of multiple scalar invocation plus the cost of
-      // inserting and extracting the values.
-      SmallVector<Type *> Tys(Args.size(), Ty);
-      return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
-    }
-
-    // We don't know anything about this scalar instruction.
-    return OpCost;
-  }
 
   // Legalize the type.
   std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35b72f5d201ba..76d9cdb6469ca 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12477,6 +12477,6 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
   if (Size <= 256)
     return Cost;
 
-  Cost.first = (Size + 255) / 256;
+  Cost.first += (Size + 255) / 256;
   return Cost;
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 7b70eb20132cf..2bd77e252ed01 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i32'
@@ -27,7 +27,7 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9i32 = add <9 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i32 = add i32 undef, undef
@@ -38,7 +38,7 @@ define amdgpu_kernel void @add_i32() #0 {
   %v6i32 = add <6 x i32> undef, undef
   %v7i32 = add <7 x i32> undef, undef
   %v8i32 = add <8 x i32> undef, undef
-  %v32i32 = add <32 x i32> undef, undef
+  %v9i32 = add <9 x i32> undef, undef
   ret void
 }
 
@@ -48,11 +48,7 @@ define amdgpu_kernel void @add_i64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i64'
@@ -60,11 +56,7 @@ define amdgpu_kernel void @add_i64() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = add <5 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i64 = add i64 undef, undef
@@ -72,10 +64,6 @@ define amdgpu_kernel void @add_i64() #0 {
   %v3i64 = add <3 x i64> undef, undef
   %v4i64 = add <4 x i64> undef, undef
   %v5i64 = add <5 x i64> undef, undef
-  %v6i64 = add <6 x i64> undef, undef
-  %v7i64 = add <7 x i64> undef, undef
-  %v8i64 = add <8 x i64> undef, undef
-  %v16i64 = add <16 x i64> undef, undef
   ret void
 }
 
@@ -87,6 +75,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'add_i16'
@@ -96,6 +86,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'add_i16'
@@ -105,6 +97,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'add_i16'
@@ -114,6 +108,8 @@ define amdgpu_kernel void @add_i16() #0 {
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = add <16 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = add i16 undef, undef
@@ -122,6 +118,8 @@ define amdgpu_kernel void @add_i16() #0 {
   %v4i16 = add <4 x i16> undef, undef
   %v5i16 = add <5 x i16> undef, undef
   %v6i16 = add <6 x i16> undef, undef
+  %v16i16 = add <16 x i16> undef, undef
+  %v17i16 = add <17 x i16> undef, undef
   ret void
 }
 
@@ -133,6 +131,8 @@ define amdgpu_kernel void @add_i8() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i8'
@@ -142,6 +142,8 @@ define amdgpu_kernel void @add_i8() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8 = add <32 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v33i8 = add <33 x i8> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8 = add i8 undef, undef
@@ -150,12 +152,14 @@ define amdgpu_kernel void @add_i8() #0 {
   %v4i8 = add <4 x i8> undef, undef
   %v5i8 = add <5 x i8> undef, undef
   %v6i8 = add <6 x i8> undef, undef
+  %v32i8 = add <32 x i8> undef, undef
+  %v33i8 = add <33 x i8> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @sub() #0 {
 ; FAST16-LABEL: 'sub'
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -165,7 +169,7 @@ define amdgpu_kernel void @sub() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'sub'
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -175,7 +179,7 @@ define amdgpu_kernel void @sub() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'sub'
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -185,7 +189,7 @@ define amdgpu_kernel void @sub() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'sub'
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i8 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
@@ -194,7 +198,7 @@ define amdgpu_kernel void @sub() #0 {
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %i8 = sub i16 undef, undef
+  %i8 = sub i8 undef, undef
   %i16 = sub i16 undef, undef
   %i32 = sub i32 undef, undef
   %i64 = sub i64 undef, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
index 4a01b1bd677c1..50164bc708bf1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
@@ -10,6 +10,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fabs_f32'
@@ -18,6 +20,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = call float @llvm.fabs.f32(float undef) #1
@@ -25,6 +29,8 @@ define amdgpu_kernel void @fabs_f32() #0 {
   %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1
   %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #1
   %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #1
+  %v8f32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef) #1
+  %v9f32 = call <9 x float> @llvm.fabs.v9f32(<9 x float> undef) #1
   ret void
 }
 
@@ -34,6 +40,7 @@ define amdgpu_kernel void @fabs_f64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fabs_f64'
@@ -41,12 +48,14 @@ define amdgpu_kernel void @fabs_f64() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = call double @llvm.fabs.f64(double undef) #1
   %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1
   %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1
   %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #1
+  %v5f64 = call <5 x double> @llvm.fabs.v5f64(<5 x double> undef) #1
   ret void
 }
 
@@ -57,6 +66,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fabs_f16'
@@ -65,6 +76,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = call half @llvm.fabs.f16(half undef) #1
@@ -72,6 +85,8 @@ define amdgpu_kernel void @fabs_f16() #0 {
   %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1
   %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #1
   %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #1
+  %v16f16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef) #1
+  %v17f16 = call <17 x half> @llvm.fabs.v17f16(<17 x half> undef) #1
   ret void
 }
 
@@ -80,17 +95,22 @@ declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1
 declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1
 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1
 declare <5 x float> @llvm.fabs.v5f32(<5 x float>) #1
+declare <8 x float> @llvm.fabs.v8f32(<8 x float>) #1
+declare <9 x float> @llvm.fabs.v9f32(<9 x float>) #1
 
 declare double @llvm.fabs.f64(double) #1
 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1
 declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1
 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1
+declare <5 x double> @llvm.fabs.v5f64(<5 x double>) #1
 
 declare half @llvm.fabs.f16(half) #1
 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1
 declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
 declare <5 x half> @llvm.fabs.v5f16(<5 x half>) #1
+declare <16 x half> @llvm.fabs.v16f16(<16 x half>) #1
+declare <17 x half> @llvm.fabs.v17f16(<17 x half>) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index 2decec00badd3..12760920733d1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOPACKEDF32-LABEL: 'fadd_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fadd <9 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fadd_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fadd <8 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fadd <9 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fadd float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fadd_f32() #0 {
   %v3f32 = fadd <3 x float> undef, undef
   %v4f32 = fadd <4 x float> undef, undef
   %v5f32 = fadd <5 x float> undef, undef
+  %v8f32 = fadd <8 x float> undef, undef
+  %v9f32 = fadd <9 x float> undef, undef
   ret void
 }
 
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fadd_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fadd_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fadd double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF16-SIZE-LABEL: 'fadd_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fadd_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
-; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fadd <16 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fadd half undef, undef
@@ -135,6 +153,8 @@ define amdgpu_kernel void @fadd_f16() #0 {
   %v3f16 = fadd <3 x half> undef, undef
   %v4f16 = fadd <4 x half> undef, undef
   %v5f16 = fadd <5 x half> undef, undef
+  %v16f16 = fadd <16 x half> undef, undef
+  %v17f16 = fadd <17 x half> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index ff300dd209cd9..310598da87d7e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -19,6 +19,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fdiv_f32_ieee'
@@ -27,6 +29,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v9f32 = fdiv <9 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fdiv float undef, undef
@@ -34,6 +38,8 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
   %v3f32 = fdiv <3 x float> undef, undef
   %v4f32 = fdiv <4 x float> undef, undef
   %v5f32 = fdiv <5 x float> undef, undef
+  %v8f32 = fdiv <8 x float> undef, undef
+  %v9f32 = fdiv <9 x float> undef, undef
   ret void
 }
 
@@ -44,6 +50,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 768 for instruction: %v9f32 = fdiv <9 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fdiv_f32_ftzdaz'
@@ -52,6 +60,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v8f32 = fdiv <8 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 672 for instruction: %v9f32 = fdiv <9 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fdiv float undef, undef
@@ -59,6 +69,8 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
   %v3f32 = fdiv <3 x float> undef, undef
   %v4f32 = fdiv <4 x float> undef, undef
   %v5f32 = fdiv <5 x float> undef, undef
+  %v8f32 = fdiv <8 x float> undef, undef
+  %v9f32 = fdiv <9 x float> undef, undef
   ret void
 }
 
@@ -68,7 +80,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'fdiv_f64'
@@ -76,7 +88,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'fdiv_f64'
@@ -84,7 +96,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'fdiv_f64'
@@ -92,7 +104,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f64'
@@ -100,7 +112,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'fdiv_f64'
@@ -108,7 +120,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'fdiv_f64'
@@ -116,7 +128,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f64'
@@ -124,7 +136,7 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fdiv double undef, undef
@@ -141,7 +153,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f16_f32ieee'
@@ -149,7 +163,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -157,7 +173,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 408 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -165,7 +183,9 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
@@ -173,6 +193,8 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
   %v3f16 = fdiv <3 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
   %v5f16 = fdiv <5 x half> undef, undef
+  %v16f16 = fdiv <16 x half> undef, undef
+  %v17f16 = fdiv <17 x half> undef, undef
   ret void
 }
 
@@ -182,7 +204,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 544 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -190,7 +214,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -198,7 +224,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 476 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -206,7 +234,9 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
@@ -214,6 +244,8 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
   %v3f16 = fdiv <3 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
   %v5f16 = fdiv <5 x half> undef, undef
+  %v16f16 = fdiv <16 x half> undef, undef
+  %v17f16 = fdiv <17 x half> undef, undef
   ret void
 }
 
@@ -223,7 +255,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -233,7 +265,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ieee'
@@ -241,7 +273,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -251,7 +283,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ieee'
@@ -259,7 +291,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -269,7 +301,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ieee'
@@ -277,7 +309,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -287,7 +319,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ieee'
@@ -295,7 +327,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -305,7 +337,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ieee'
@@ -313,7 +345,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -323,7 +355,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ieee'
@@ -331,7 +363,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -341,7 +373,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ieee'
@@ -349,7 +381,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -359,7 +391,7 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half 1.0, undef
@@ -396,7 +428,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ftzdaz'
@@ -414,7 +446,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ftzdaz'
@@ -432,7 +464,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ftzdaz'
@@ -450,7 +482,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ftzdaz'
@@ -458,7 +490,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -468,7 +500,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ftzdaz'
@@ -476,7 +508,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -486,7 +518,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ftzdaz'
@@ -494,7 +526,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -504,7 +536,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ftzdaz'
@@ -512,7 +544,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
@@ -522,7 +554,7 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half 1.0, undef

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index 5eb4aa78b8c9e..a5906b070d58e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; F32-LABEL: 'fmul_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; F32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-SIZE-LABEL: 'fmul_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fmul <9 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fmul_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fmul <9 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fmul float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fmul_f32() #0 {
   %v3f32 = fmul <3 x float> undef, undef
   %v4f32 = fmul <4 x float> undef, undef
   %v5f32 = fmul <5 x float> undef, undef
+  %v8f32 = fmul <8 x float> undef, undef
+  %v9f32 = fmul <9 x float> undef, undef
   ret void
 }
 
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fmul_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-SIZE-LABEL: 'fmul_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fmul_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fmul double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-SIZE-LABEL: 'fmul_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fmul_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fmul half undef, undef
@@ -135,6 +153,8 @@ define amdgpu_kernel void @fmul_f16() #0 {
   %v3f16 = fmul <3 x half> undef, undef
   %v4f16 = fmul <4 x half> undef, undef
   %v5f16 = fmul <5 x half> undef, undef
+  %v16f16 = fmul <16 x half> undef, undef
+  %v17f16 = fmul <17 x half> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
index b4482906cf400..bf33cdc63553b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@@ -10,6 +10,8 @@ define amdgpu_kernel void @fneg_f32() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fneg_f32'
@@ -18,6 +20,8 @@ define amdgpu_kernel void @fneg_f32() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8f32 = fneg <8 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v9f32 = fneg <9 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fneg float undef
@@ -25,6 +29,8 @@ define amdgpu_kernel void @fneg_f32() {
   %v3f32 = fneg <3 x float> undef
   %v4f32 = fneg <4 x float> undef
   %v5f32 = fneg <5 x float> undef
+  %v8f32 = fneg <8 x float> undef
+  %v9f32 = fneg <9 x float> undef
   ret void
 }
 
@@ -59,7 +65,9 @@ define amdgpu_kernel void @fneg_f16() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fneg_f16'
@@ -67,7 +75,9 @@ define amdgpu_kernel void @fneg_f16() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = fneg <5 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16f16 = fneg <16 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17f16 = fneg <17 x half> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fneg half undef
@@ -75,5 +85,7 @@ define amdgpu_kernel void @fneg_f16() {
   %v3f16 = fneg <3 x half> undef
   %v4f16 = fneg <4 x half> undef
   %v5f16 = fneg <5 x half> undef
+  %v16f16 = fneg <16 x half> undef
+  %v17f16 = fneg <17 x half> undef
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index a10ef8e16b022..38fec3cfe5349 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -14,6 +14,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOPACKEDF32-LABEL: 'fsub_f32'
@@ -22,6 +24,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
@@ -30,6 +34,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
@@ -38,6 +44,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fsub float undef, undef
@@ -45,6 +53,8 @@ define amdgpu_kernel void @fsub_f32() #0 {
   %v3f32 = fsub <3 x float> undef, undef
   %v4f32 = fsub <4 x float> undef, undef
   %v5f32 = fsub <5 x float> undef, undef
+  %v8f32 = fsub <8 x float> undef, undef
+  %v9f32 = fsub <9 x float> undef, undef
   ret void
 }
 
@@ -54,7 +64,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fsub_f64'
@@ -62,7 +72,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f64'
@@ -70,7 +80,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64'
@@ -78,7 +88,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64'
@@ -86,7 +96,7 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fsub double undef, undef
@@ -103,7 +113,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f16'
@@ -111,7 +123,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF16-SIZE-LABEL: 'fsub_f16'
@@ -119,7 +133,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
@@ -127,7 +143,9 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
-; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fsub half undef, undef
@@ -135,5 +153,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
   %v3f16 = fsub <3 x half> undef, undef
   %v4f16 = fsub <4 x half> undef, undef
   %v5f16 = fsub <5 x half> undef, undef
+  %v16f16 = fsub <16 x half> undef, undef
+  %v17f16 = fsub <17 x half> undef, undef
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 3c5e3da71079e..824b7e1ee93d6 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -12,6 +12,8 @@ define amdgpu_kernel void @mul_i32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i32 = mul <3 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i32 = mul <4 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i32 = mul <8 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v9i32 = mul <9 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'mul_i32'
@@ -20,6 +22,8 @@ define amdgpu_kernel void @mul_i32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i32 = mul <3 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i32 = mul <4 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i32 = mul <5 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i32 = mul <8 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9i32 = mul <9 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i32 = mul i32 undef, undef
@@ -27,6 +31,8 @@ define amdgpu_kernel void @mul_i32() #0 {
   %v3i32 = mul <3 x i32> undef, undef
   %v4i32 = mul <4 x i32> undef, undef
   %v5i32 = mul <5 x i32> undef, undef
+  %v8i32 = mul <8 x i32> undef, undef
+  %v9i32 = mul <9 x i32> undef, undef
   ret void
 }
 
@@ -36,8 +42,7 @@ define amdgpu_kernel void @mul_i64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v5i64 = mul <4 x i64> undef, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v5i64 = mul <5 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'mul_i64'
@@ -45,16 +50,14 @@ define amdgpu_kernel void @mul_i64() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = mul <4 x i64> undef, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %v5i64 = mul <5 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i64 = mul i64 undef, undef
   %v2i64 = mul <2 x i64> undef, undef
   %v3i64 = mul <3 x i64> undef, undef
   %v4i64 = mul <4 x i64> undef, undef
-  %v5i64 = mul <4 x i64> undef, undef
-  %v8i64 = mul <8 x i64> undef, undef
+  %v5i64 = mul <5 x i64> undef, undef
   ret void
 }
 
@@ -64,7 +67,9 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-LABEL: 'mul_i16'
@@ -72,7 +77,9 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'mul_i16'
@@ -80,7 +87,9 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'mul_i16'
@@ -88,7 +97,9 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = mul <16 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = mul i16 undef, undef
@@ -96,6 +107,8 @@ define amdgpu_kernel void @mul_i16() #0 {
   %v3i16 = mul <3 x i16> undef, undef
   %v4i16 = mul <4 x i16> undef, undef
   %v5i16 = mul <5 x i16> undef, undef
+  %v16i16 = mul <16 x i16> undef, undef
+  %v17i16 = mul <17 x i16> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
index 3ab46ad8e14b3..c2d3ad4692207 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
@@ -26,7 +26,7 @@ define amdgpu_kernel void @shl() #0 {
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'shl'
@@ -49,7 +49,7 @@ define amdgpu_kernel void @shl() #0 {
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = shl <2 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = shl <3 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'shl'
@@ -72,7 +72,7 @@ define amdgpu_kernel void @shl() #0 {
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'shl'
@@ -95,7 +95,7 @@ define amdgpu_kernel void @shl() #0 {
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8 = shl i8 undef, undef
@@ -142,7 +142,7 @@ define amdgpu_kernel void @lshr() #0 {
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'lshr'
@@ -165,7 +165,7 @@ define amdgpu_kernel void @lshr() #0 {
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = lshr <2 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = lshr <3 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'lshr'
@@ -188,7 +188,7 @@ define amdgpu_kernel void @lshr() #0 {
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'lshr'
@@ -211,7 +211,7 @@ define amdgpu_kernel void @lshr() #0 {
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8 = lshr i8 undef, undef
@@ -258,7 +258,7 @@ define amdgpu_kernel void @ashr() #0 {
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'ashr'
@@ -281,7 +281,7 @@ define amdgpu_kernel void @ashr() #0 {
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = ashr <2 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = ashr <3 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'ashr'
@@ -304,7 +304,7 @@ define amdgpu_kernel void @ashr() #0 {
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'ashr'
@@ -327,7 +327,7 @@ define amdgpu_kernel void @ashr() #0 {
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8 = ashr i8 undef, undef