[llvm] cf362ff - [NFC][AMDGPU] Improve cost model tests coverage.

Daniil Fukalov via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 30 08:14:48 PDT 2021


Author: Daniil Fukalov
Date: 2021-09-30T18:13:17+03:00
New Revision: cf362ff4cab3a3bef5f32f0445526823998eb72f

URL: https://github.com/llvm/llvm-project/commit/cf362ff4cab3a3bef5f32f0445526823998eb72f
DIFF: https://github.com/llvm/llvm-project/commit/cf362ff4cab3a3bef5f32f0445526823998eb72f.diff

LOG: [NFC][AMDGPU] Improve cost model tests coverage.

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
    llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
    llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
    llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
    llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
    llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
    llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
    llvm/test/Analysis/CostModel/AMDGPU/fma.ll
    llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
    llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
    llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
    llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
    llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
    llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
    llvm/test/Analysis/CostModel/AMDGPU/mul.ll
    llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
    llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 930ebc89a9e31..7b70eb20132cf 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -12,6 +12,10 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'add_i32'
@@ -20,6 +24,10 @@ define amdgpu_kernel void @add_i32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = add <3 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = add <4 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = add <5 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v6i32 = add <6 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v7i32 = add <7 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = add <8 x i32> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v32i32 = add <32 x i32> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i32 = add i32 undef, undef
@@ -27,6 +35,10 @@ define amdgpu_kernel void @add_i32() #0 {
   %v3i32 = add <3 x i32> undef, undef
   %v4i32 = add <4 x i32> undef, undef
   %v5i32 = add <5 x i32> undef, undef
+  %v6i32 = add <6 x i32> undef, undef
+  %v7i32 = add <7 x i32> undef, undef
+  %v8i32 = add <8 x i32> undef, undef
+  %v32i32 = add <32 x i32> undef, undef
   ret void
 }
 
@@ -36,6 +48,10 @@ define amdgpu_kernel void @add_i64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -44,6 +60,10 @@ define amdgpu_kernel void @add_i64() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = add <2 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = add <3 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = add <4 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = add <5 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v6i64 = add <6 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v7i64 = add <7 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v8i64 = add <8 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16i64 = add <16 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -51,6 +71,10 @@ define amdgpu_kernel void @add_i64() #0 {
   %v2i64 = add <2 x i64> undef, undef
   %v3i64 = add <3 x i64> undef, undef
   %v4i64 = add <4 x i64> undef, undef
+  %v5i64 = add <5 x i64> undef, undef
+  %v6i64 = add <6 x i64> undef, undef
+  %v7i64 = add <7 x i64> undef, undef
+  %v8i64 = add <8 x i64> undef, undef
   %v16i64 = add <16 x i64> undef, undef
   ret void
 }
@@ -59,61 +83,124 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-LABEL: 'add_i16'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'add_i16'
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'add_i16'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = add <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = add <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = add <4 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'add_i16'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = add i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = add <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = add <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = add <4 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = add <5 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = add i16 undef, undef
   %v2i16 = add <2 x i16> undef, undef
+  %v3i16 = add <3 x i16> undef, undef
+  %v4i16 = add <4 x i16> undef, undef
+  %v5i16 = add <5 x i16> undef, undef
+  %v6i16 = add <6 x i16> undef, undef
+  ret void
+}
+
+define amdgpu_kernel void @add_i8() #0 {
+; ALL-LABEL: 'add_i8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'add_i8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = add i8 undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = add <2 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = add <3 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = add <4 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = add <5 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v6i8 = add <6 x i8> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %i8 = add i8 undef, undef
+  %v2i8 = add <2 x i8> undef, undef
+  %v3i8 = add <3 x i8> undef, undef
+  %v4i8 = add <4 x i8> undef, undef
+  %v5i8 = add <5 x i8> undef, undef
+  %v6i8 = add <6 x i8> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @sub() #0 {
 ; FAST16-LABEL: 'sub'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'sub'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'sub'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = sub <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'sub'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = sub i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = sub i32 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = sub i64 undef, undef
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = sub i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = sub <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = sub <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = sub <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
+  %i8 = sub i16 undef, undef
+  %i16 = sub i16 undef, undef
   %i32 = sub i32 undef, undef
   %i64 = sub i64 undef, undef
-  %i16 = sub i16 undef, undef
   %v2i16 = sub <2 x i16> undef, undef
+  %v3i16 = sub <3 x i16> undef, undef
+  %v4i16 = sub <4 x i16> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
index 8d558e4366171..95b996bcda189 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
@@ -7,18 +7,69 @@ define void @addrspacecast_global_to_flat() #0 {
 ; ALL-LABEL: 'addrspacecast_global_to_flat'
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'addrspacecast_global_to_flat'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8ptr = addrspacecast i8 addrspace(1)* undef to i8*
   %v2i8ptr = addrspacecast <2 x i8 addrspace(1)*> undef to <2 x i8*>
+  %v3i8ptr = addrspacecast <3 x i8 addrspace(1)*> undef to <3 x i8*>
+  %v4i8ptr = addrspacecast <4 x i8 addrspace(1)*> undef to <4 x i8*>
   %v32i8ptr = addrspacecast <32 x i8 addrspace(1)*> undef to <32 x i8*>
+  %i16ptr = addrspacecast i16 addrspace(1)* undef to i16*
+  %v2i16ptr = addrspacecast <2 x i16 addrspace(1)*> undef to <2 x i16*>
+  %v3i16ptr = addrspacecast <3 x i16 addrspace(1)*> undef to <3 x i16*>
+  %v4i16ptr = addrspacecast <4 x i16 addrspace(1)*> undef to <4 x i16*>
+  %v32i16ptr = addrspacecast <32 x i16 addrspace(1)*> undef to <32 x i16*>
+  %i32ptr = addrspacecast i32 addrspace(1)* undef to i32*
+  %v2i32ptr = addrspacecast <2 x i32 addrspace(1)*> undef to <2 x i32*>
+  %v3i32ptr = addrspacecast <3 x i32 addrspace(1)*> undef to <3 x i32*>
+  %v4i32ptr = addrspacecast <4 x i32 addrspace(1)*> undef to <4 x i32*>
+  %v32i32ptr = addrspacecast <32 x i32 addrspace(1)*> undef to <32 x i32*>
+  %i64ptr = addrspacecast i64 addrspace(1)* undef to i64*
+  %v2i64ptr = addrspacecast <2 x i64 addrspace(1)*> undef to <2 x i64*>
+  %v3i64ptr = addrspacecast <3 x i64 addrspace(1)*> undef to <3 x i64*>
+  %v4i64ptr = addrspacecast <4 x i64 addrspace(1)*> undef to <4 x i64*>
+  %v32i64ptr = addrspacecast <32 x i64 addrspace(1)*> undef to <32 x i64*>
   ret void
 }
 
@@ -26,18 +77,69 @@ define void @addrspacecast_local_to_flat() #0 {
 ; ALL-LABEL: 'addrspacecast_local_to_flat'
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'addrspacecast_local_to_flat'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8ptr = addrspacecast i8 addrspace(3)* undef to i8*
   %v2i8ptr = addrspacecast <2 x i8 addrspace(3)*> undef to <2 x i8*>
+  %v3i8ptr = addrspacecast <3 x i8 addrspace(3)*> undef to <3 x i8*>
+  %v4i8ptr = addrspacecast <4 x i8 addrspace(3)*> undef to <4 x i8*>
   %v32i8ptr = addrspacecast <32 x i8 addrspace(3)*> undef to <32 x i8*>
+  %i16ptr = addrspacecast i16 addrspace(3)* undef to i16*
+  %v2i16ptr = addrspacecast <2 x i16 addrspace(3)*> undef to <2 x i16*>
+  %v3i16ptr = addrspacecast <3 x i16 addrspace(3)*> undef to <3 x i16*>
+  %v4i16ptr = addrspacecast <4 x i16 addrspace(3)*> undef to <4 x i16*>
+  %v32i16ptr = addrspacecast <32 x i16 addrspace(3)*> undef to <32 x i16*>
+  %i32ptr = addrspacecast i32 addrspace(3)* undef to i32*
+  %v2i32ptr = addrspacecast <2 x i32 addrspace(3)*> undef to <2 x i32*>
+  %v3i32ptr = addrspacecast <3 x i32 addrspace(3)*> undef to <3 x i32*>
+  %v4i32ptr = addrspacecast <4 x i32 addrspace(3)*> undef to <4 x i32*>
+  %v32i32ptr = addrspacecast <32 x i32 addrspace(3)*> undef to <32 x i32*>
+  %i64ptr = addrspacecast i64 addrspace(3)* undef to i64*
+  %v2i64ptr = addrspacecast <2 x i64 addrspace(3)*> undef to <2 x i64*>
+  %v3i64ptr = addrspacecast <3 x i64 addrspace(3)*> undef to <3 x i64*>
+  %v4i64ptr = addrspacecast <4 x i64 addrspace(3)*> undef to <4 x i64*>
+  %v32i64ptr = addrspacecast <32 x i64 addrspace(3)*> undef to <32 x i64*>
   ret void
 }
 
@@ -45,18 +147,69 @@ define void @addrspacecast_flat_to_local() #0 {
 ; ALL-LABEL: 'addrspacecast_flat_to_local'
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'addrspacecast_flat_to_local'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i8ptr = addrspacecast i8* undef to i8 addrspace(3)*
   %v2i8ptr = addrspacecast <2 x i8*> undef to <2 x i8 addrspace(3)*>
+  %v3i8ptr = addrspacecast <3 x i8*> undef to <3 x i8 addrspace(3)*>
+  %v4i8ptr = addrspacecast <4 x i8*> undef to <4 x i8 addrspace(3)*>
   %v32i8ptr = addrspacecast <32 x i8*> undef to <32 x i8 addrspace(3)*>
+  %i16ptr = addrspacecast i16* undef to i16 addrspace(3)*
+  %v2i16ptr = addrspacecast <2 x i16*> undef to <2 x i16 addrspace(3)*>
+  %v3i16ptr = addrspacecast <3 x i16*> undef to <3 x i16 addrspace(3)*>
+  %v4i16ptr = addrspacecast <4 x i16*> undef to <4 x i16 addrspace(3)*>
+  %v32i16ptr = addrspacecast <32 x i16*> undef to <32 x i16 addrspace(3)*>
+  %i32ptr = addrspacecast i32* undef to i32 addrspace(3)*
+  %v2i32ptr = addrspacecast <2 x i32*> undef to <2 x i32 addrspace(3)*>
+  %v3i32ptr = addrspacecast <3 x i32*> undef to <3 x i32 addrspace(3)*>
+  %v4i32ptr = addrspacecast <4 x i32*> undef to <4 x i32 addrspace(3)*>
+  %v32i32ptr = addrspacecast <32 x i32*> undef to <32 x i32 addrspace(3)*>
+  %i64ptr = addrspacecast i64* undef to i64 addrspace(3)*
+  %v2i64ptr = addrspacecast <2 x i64*> undef to <2 x i64 addrspace(3)*>
+  %v3i64ptr = addrspacecast <3 x i64*> undef to <3 x i64 addrspace(3)*>
+  %v4i64ptr = addrspacecast <4 x i64*> undef to <4 x i64 addrspace(3)*>
+  %v32i64ptr = addrspacecast <32 x i64*> undef to <32 x i64 addrspace(3)*>
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
index 042b43bc52652..e57e6d0a38c35 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll
@@ -7,94 +7,289 @@
 
 define amdgpu_kernel void @or() #0 {
 ; SLOW16-LABEL: 'or'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = or <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = or <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-LABEL: 'or'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = or <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = or <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'or'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = or <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = or <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = or <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'or'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = or i8 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = or <2 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = or <3 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = or <4 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = or i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = or <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = or <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = or i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = or <2 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = or <3 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = or <4 x i32> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = or i64 undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = or <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = or <2 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = or <3 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = or <4 x i64> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
+  %i8 = or i8 undef, undef
+  %v2i8 = or <2 x i8> undef, undef
+  %v3i8 = or <3 x i8> undef, undef
+  %v4i8 = or <4 x i8> undef, undef
+  %i16 = or i16 undef, undef
+  %v2i16 = or <2 x i16> undef, undef
+  %v3i16 = or <3 x i16> undef, undef
+  %v4i16 = or <4 x i16> undef, undef
   %i32 = or i32 undef, undef
+  %v2i32 = or <2 x i32> undef, undef
+  %v3i32 = or <3 x i32> undef, undef
+  %v4i32 = or <4 x i32> undef, undef
   %i64 = or i64 undef, undef
-  %v2i16 = or <2 x i16> undef, undef
+  %v2i64 = or <2 x i64> undef, undef
+  %v3i64 = or <3 x i64> undef, undef
+  %v4i64 = or <4 x i64> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @xor() #0 {
 ; SLOW16-LABEL: 'xor'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = xor <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = xor <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-LABEL: 'xor'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = xor <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = xor <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'xor'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = xor <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = xor <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'xor'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = xor i8 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = xor <2 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = xor <3 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = xor <4 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = xor i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = xor <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = xor <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = xor i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = xor <2 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = xor <3 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = xor <4 x i32> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = xor i64 undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = xor <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = xor <2 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = xor <3 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = xor <4 x i64> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
+  %i8 = xor i8 undef, undef
+  %v2i8 = xor <2 x i8> undef, undef
+  %v3i8 = xor <3 x i8> undef, undef
+  %v4i8 = xor <4 x i8> undef, undef
+  %i16 = xor i16 undef, undef
+  %v2i16 = xor <2 x i16> undef, undef
+  %v3i16 = xor <3 x i16> undef, undef
+  %v4i16 = xor <4 x i16> undef, undef
   %i32 = xor i32 undef, undef
+  %v2i32 = xor <2 x i32> undef, undef
+  %v3i32 = xor <3 x i32> undef, undef
+  %v4i32 = xor <4 x i32> undef, undef
   %i64 = xor i64 undef, undef
-  %v2i16 = xor <2 x i16> undef, undef
+  %v2i64 = xor <2 x i64> undef, undef
+  %v3i64 = xor <3 x i64> undef, undef
+  %v4i64 = xor <4 x i64> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @and() #0 {
 ; SLOW16-LABEL: 'and'
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = and <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = and <4 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-LABEL: 'and'
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = and <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = and <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'and'
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = and <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = and <4 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = and <2 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'and'
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = and i8 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = and <2 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = and <3 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = and <4 x i8> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = and i16 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = and <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = and <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = and i32 undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = and <2 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = and <3 x i32> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = and <4 x i32> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = and i64 undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = and <2 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = and <2 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = and <3 x i64> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = and <4 x i64> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
+  %i8 = and i8 undef, undef
+  %v2i8 = and <2 x i8> undef, undef
+  %v3i8 = and <3 x i8> undef, undef
+  %v4i8 = and <4 x i8> undef, undef
+  %i16 = and i16 undef, undef
+  %v2i16 = and <2 x i16> undef, undef
+  %v3i16 = and <3 x i16> undef, undef
+  %v4i16 = and <4 x i16> undef, undef
   %i32 = and i32 undef, undef
+  %v2i32 = and <2 x i32> undef, undef
+  %v3i32 = and <3 x i32> undef, undef
+  %v4i32 = and <4 x i32> undef, undef
   %i64 = and i64 undef, undef
-  %v2i16 = and <2 x i16> undef, undef
+  %v2i64 = and <2 x i64> undef, undef
+  %v3i64 = and <3 x i64> undef, undef
+  %v4i64 = and <4 x i64> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
index f907148ff6f83..0843430344ecc 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll
@@ -9,97 +9,364 @@
 
 define amdgpu_kernel void @extractelement_32(i32 %arg) {
 ; GCN-LABEL: 'extractelement_32'
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = extractelement <3 x i32> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = extractelement <5 x i32> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_2 = extractelement <3 x i32> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_2 = extractelement <4 x i32> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_2 = extractelement <5 x i32> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_2 = extractelement <8 x i32> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_3 = extractelement <5 x i32> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GCN-SIZE-LABEL: 'extractelement_32'
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = extractelement <2 x float> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = extractelement <3 x i32> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = extractelement <5 x i32> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = extractelement <2 x float> undef, i32 1
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = extractelement <3 x i32> undef, i32 1
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = extractelement <4 x i32> undef, i32 1
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = extractelement <5 x i32> undef, i32 1
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_1 = extractelement <8 x i32> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_2 = extractelement <3 x i32> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_2 = extractelement <4 x i32> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_2 = extractelement <5 x i32> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_2 = extractelement <8 x i32> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_3 = extractelement <5 x i32> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
+  %v2i32_0 = extractelement <2 x i32> undef, i32 0
+  %v2f32_0 = extractelement <2 x float> undef, i32 0
+  %v3i32_0 = extractelement <3 x i32> undef, i32 0
+  %v4i32_0 = extractelement <4 x i32> undef, i32 0
+  %v5i32_0 = extractelement <5 x i32> undef, i32 0
+  %v8i32_0 = extractelement <8 x i32> undef, i32 0
+
   %v2i32_1 = extractelement <2 x i32> undef, i32 1
   %v2f32_1 = extractelement <2 x float> undef, i32 1
   %v3i32_1 = extractelement <3 x i32> undef, i32 1
   %v4i32_1 = extractelement <4 x i32> undef, i32 1
   %v5i32_1 = extractelement <5 x i32> undef, i32 1
   %v8i32_1 = extractelement <8 x i32> undef, i32 1
+
+  %v3i32_2 = extractelement <3 x i32> undef, i32 2
+  %v4i32_2 = extractelement <4 x i32> undef, i32 2
+  %v5i32_2 = extractelement <5 x i32> undef, i32 2
+  %v8i32_2 = extractelement <8 x i32> undef, i32 2
+
+  %v4i32_3 = extractelement <4 x i32> undef, i32 3
+  %v5i32_3 = extractelement <5 x i32> undef, i32 3
+  %v8i32_3 = extractelement <8 x i32> undef, i32 3
+
+  %v2i32_a = extractelement <2 x i32> undef, i32 %arg
+  %v4i32_a = extractelement <4 x i32> undef, i32 %arg
   %v8i32_a = extractelement <8 x i32> undef, i32 %arg
   ret void
 }
 
-define amdgpu_kernel void @extractelement_64() {
+define amdgpu_kernel void @extractelement_64(i32 %arg) {
 ; GCN-LABEL: 'extractelement_64'
-; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1
-; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1
-; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1
-; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = extractelement <3 x i64> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = extractelement <5 x i64> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = extractelement <5 x i64> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_2 = extractelement <3 x i64> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_2 = extractelement <4 x i64> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_2 = extractelement <5 x i64> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_2 = extractelement <8 x i64> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_3 = extractelement <5 x i64> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GCN-SIZE-LABEL: 'extractelement_64'
-; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i64 1
-; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i64 1
-; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i64 1
-; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i64 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = extractelement <2 x double> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = extractelement <3 x i64> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = extractelement <5 x i64> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = extractelement <2 x double> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = extractelement <3 x i64> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = extractelement <4 x i64> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = extractelement <5 x i64> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_1 = extractelement <8 x i64> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_2 = extractelement <3 x i64> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_2 = extractelement <4 x i64> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_2 = extractelement <5 x i64> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_2 = extractelement <8 x i64> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_3 = extractelement <5 x i64> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %v2i64_1 = extractelement <2 x i64> undef, i64 1
-  %v3i64_1 = extractelement <3 x i64> undef, i64 1
-  %v4i64_1 = extractelement <4 x i64> undef, i64 1
-  %v8i64_1 = extractelement <8 x i64> undef, i64 1
+  %v2i64_0 = extractelement <2 x i64> undef, i32 0
+  %v2f64_0 = extractelement <2 x double> undef, i32 0
+  %v3i64_0 = extractelement <3 x i64> undef, i32 0
+  %v4i64_0 = extractelement <4 x i64> undef, i32 0
+  %v5i64_0 = extractelement <5 x i64> undef, i32 0
+  %v8i64_0 = extractelement <8 x i64> undef, i32 0
+
+  %v2i64_1 = extractelement <2 x i64> undef, i32 1
+  %v2f64_1 = extractelement <2 x double> undef, i32 1
+  %v3i64_1 = extractelement <3 x i64> undef, i32 1
+  %v4i64_1 = extractelement <4 x i64> undef, i32 1
+  %v5i64_1 = extractelement <5 x i64> undef, i32 1
+  %v8i64_1 = extractelement <8 x i64> undef, i32 1
+
+  %v3i64_2 = extractelement <3 x i64> undef, i32 2
+  %v4i64_2 = extractelement <4 x i64> undef, i32 2
+  %v5i64_2 = extractelement <5 x i64> undef, i32 2
+  %v8i64_2 = extractelement <8 x i64> undef, i32 2
+
+  %v4i64_3 = extractelement <4 x i64> undef, i32 3
+  %v5i64_3 = extractelement <5 x i64> undef, i32 3
+  %v8i64_3 = extractelement <8 x i64> undef, i32 3
+
+  %v2i64_a = extractelement <2 x i64> undef, i32 %arg
+  %v4i64_a = extractelement <4 x i64> undef, i32 %arg
+  %v8i64_a = extractelement <8 x i64> undef, i32 %arg
   ret void
 }
 
-define amdgpu_kernel void @extractelement_8() {
+define amdgpu_kernel void @extractelement_8(i32 %arg) {
 ; GCN-LABEL: 'extractelement_8'
-; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = extractelement <3 x i8> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = extractelement <5 x i8> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = extractelement <3 x i8> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = extractelement <5 x i8> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_2 = extractelement <3 x i8> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = extractelement <4 x i8> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_2 = extractelement <5 x i8> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = extractelement <8 x i8> undef, i32 2
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_3 = extractelement <5 x i8> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = extractelement <8 x i8> undef, i32 3
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; GCN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; GCN-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GCN-SIZE-LABEL: 'extractelement_8'
-; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i8 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = extractelement <2 x i8> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = extractelement <3 x i8> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = extractelement <4 x i8> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = extractelement <5 x i8> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = extractelement <8 x i8> undef, i32 0
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = extractelement <2 x i8> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = extractelement <3 x i8> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = extractelement <4 x i8> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = extractelement <5 x i8> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = extractelement <8 x i8> undef, i32 1
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_2 = extractelement <3 x i8> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_2 = extractelement <4 x i8> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_2 = extractelement <5 x i8> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = extractelement <8 x i8> undef, i32 2
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = extractelement <4 x i8> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_3 = extractelement <5 x i8> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = extractelement <8 x i8> undef, i32 3
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = extractelement <8 x i8> undef, i32 %arg
 ; GCN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %v4i8_1 = extractelement <4 x i8> undef, i8 1
+  %v2i8_0 = extractelement <2 x i8> undef, i32 0
+  %v3i8_0 = extractelement <3 x i8> undef, i32 0
+  %v4i8_0 = extractelement <4 x i8> undef, i32 0
+  %v5i8_0 = extractelement <5 x i8> undef, i32 0
+  %v8i8_0 = extractelement <8 x i8> undef, i32 0
+
+  %v2i8_1 = extractelement <2 x i8> undef, i32 1
+  %v3i8_1 = extractelement <3 x i8> undef, i32 1
+  %v4i8_1 = extractelement <4 x i8> undef, i32 1
+  %v5i8_1 = extractelement <5 x i8> undef, i32 1
+  %v8i8_1 = extractelement <8 x i8> undef, i32 1
+
+  %v3i8_2 = extractelement <3 x i8> undef, i32 2
+  %v4i8_2 = extractelement <4 x i8> undef, i32 2
+  %v5i8_2 = extractelement <5 x i8> undef, i32 2
+  %v8i8_2 = extractelement <8 x i8> undef, i32 2
+
+  %v4i8_3 = extractelement <4 x i8> undef, i32 3
+  %v5i8_3 = extractelement <5 x i8> undef, i32 3
+  %v8i8_3 = extractelement <8 x i8> undef, i32 3
+
+  %v2i8_a = extractelement <2 x i8> undef, i32 %arg
+  %v4i8_a = extractelement <4 x i8> undef, i32 %arg
+  %v8i8_a = extractelement <8 x i8> undef, i32 %arg
   ret void
 }
 
 define amdgpu_kernel void @extractelement_16(i32 %arg) {
 ; CI-LABEL: 'extractelement_16'
-; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
-; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3
 ; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; CI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX89-LABEL: 'extractelement_16'
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3
 ; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; GFX89-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'extractelement_16'
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GFX89-SIZE-LABEL: 'extractelement_16'
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i16 0
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i16 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = extractelement <2 x i16> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_0 = extractelement <2 x half> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = extractelement <3 x i16> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = extractelement <4 x i16> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = extractelement <5 x i16> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = extractelement <2 x i16> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_1 = extractelement <2 x half> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = extractelement <3 x i16> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = extractelement <4 x i16> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = extractelement <5 x i16> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = extractelement <8 x i16> undef, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_2 = extractelement <3 x i16> undef, i32 2
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = extractelement <4 x i16> undef, i32 2
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_2 = extractelement <5 x i16> undef, i32 2
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = extractelement <8 x i16> undef, i32 2
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = extractelement <4 x i16> undef, i32 3
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_3 = extractelement <5 x i16> undef, i32 3
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = extractelement <8 x i16> undef, i32 3
 ; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg
 ; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %v2i16_0 = extractelement <2 x i16> undef, i16 0
-  %v2i16_1 = extractelement <2 x i16> undef, i16 1
+  %v2i16_0 = extractelement <2 x i16> undef, i32 0
+  %v2f16_0 = extractelement <2 x half> undef, i32 0
+  %v3i16_0 = extractelement <3 x i16> undef, i32 0
+  %v4i16_0 = extractelement <4 x i16> undef, i32 0
+  %v5i16_0 = extractelement <5 x i16> undef, i32 0
+  %v8i16_0 = extractelement <8 x i16> undef, i32 0
+
+  %v2i16_1 = extractelement <2 x i16> undef, i32 1
+  %v2f16_1 = extractelement <2 x half> undef, i32 1
+  %v3i16_1 = extractelement <3 x i16> undef, i32 1
+  %v4i16_1 = extractelement <4 x i16> undef, i32 1
+  %v5i16_1 = extractelement <5 x i16> undef, i32 1
+  %v8i16_1 = extractelement <8 x i16> undef, i32 1
+
+  %v3i16_2 = extractelement <3 x i16> undef, i32 2
+  %v4i16_2 = extractelement <4 x i16> undef, i32 2
+  %v5i16_2 = extractelement <5 x i16> undef, i32 2
+  %v8i16_2 = extractelement <8 x i16> undef, i32 2
+
+  %v4i16_3 = extractelement <4 x i16> undef, i32 3
+  %v5i16_3 = extractelement <5 x i16> undef, i32 3
+  %v8i16_3 = extractelement <8 x i16> undef, i32 3
+
   %v2i16_a = extractelement <2 x i16> undef, i32 %arg
+  %v4i16_a = extractelement <4 x i16> undef, i32 %arg
+  %v8i16_a = extractelement <8 x i16> undef, i32 %arg
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
index 7475a4d1bef50..4a01b1bd677c1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll
@@ -8,6 +8,7 @@ define amdgpu_kernel void @fabs_f32() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -15,12 +16,14 @@ define amdgpu_kernel void @fabs_f32() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = call float @llvm.fabs.f32(float undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = call float @llvm.fabs.f32(float undef) #1
   %v2f32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef) #1
   %v3f32 = call <3 x float> @llvm.fabs.v3f32(<3 x float> undef) #1
+  %v4f32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) #1
   %v5f32 = call <5 x float> @llvm.fabs.v5f32(<5 x float> undef) #1
   ret void
 }
@@ -30,17 +33,20 @@ define amdgpu_kernel void @fabs_f64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fabs_f64'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = call double @llvm.fabs.f64(double undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = call double @llvm.fabs.f64(double undef) #1
   %v2f64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef) #1
   %v3f64 = call <3 x double> @llvm.fabs.v3f64(<3 x double> undef) #1
+  %v4f64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef) #1
   ret void
 }
 
@@ -49,32 +55,42 @@ define amdgpu_kernel void @fabs_f16() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; ALL-SIZE-LABEL: 'fabs_f16'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = call half @llvm.fabs.f16(half undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #2
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #2
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = call half @llvm.fabs.f16(half undef) #1
   %v2f16 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef) #1
   %v3f16 = call <3 x half> @llvm.fabs.v3f16(<3 x half> undef) #1
+  %v4f16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef) #1
+  %v5f16 = call <5 x half> @llvm.fabs.v5f16(<5 x half> undef) #1
   ret void
 }
 
 declare float @llvm.fabs.f32(float) #1
 declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1
 declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1
 declare <5 x float> @llvm.fabs.v5f32(<5 x float>) #1
 
 declare double @llvm.fabs.f64(double) #1
 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1
 declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #1
 
 declare half @llvm.fabs.f16(half) #1
 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 declare <3 x half> @llvm.fabs.v3f16(<3 x half>) #1
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
+declare <5 x half> @llvm.fabs.v5f16(<5 x half>) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index a733f4adba117..2decec00badd3 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -12,6 +12,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -19,6 +20,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -26,6 +28,7 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fadd <2 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fadd <5 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -33,12 +36,14 @@ define amdgpu_kernel void @fadd_f32() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fadd float undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fadd <2 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fadd <3 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fadd <4 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fadd <5 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fadd float undef, undef
   %v2f32 = fadd <2 x float> undef, undef
   %v3f32 = fadd <3 x float> undef, undef
+  %v4f32 = fadd <4 x float> undef, undef
   %v5f32 = fadd <5 x float> undef, undef
   ret void
 }
@@ -48,35 +53,47 @@ define amdgpu_kernel void @fadd_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fadd_f64'
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f64'
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fadd double undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fadd <4 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fadd_f64'
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fadd double undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fadd <4 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fadd_f64'
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fadd double undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fadd <2 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fadd <3 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fadd <4 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fadd <5 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fadd double undef, undef
   %v2f64 = fadd <2 x double> undef, undef
   %v3f64 = fadd <3 x double> undef, undef
+  %v4f64 = fadd <4 x double> undef, undef
+  %v5f64 = fadd <5 x double> undef, undef
   ret void
 }
 
@@ -86,6 +103,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f16'
@@ -93,6 +111,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF16-SIZE-LABEL: 'fadd_f16'
@@ -100,6 +119,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fadd_f16'
@@ -107,12 +127,14 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fadd <2 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fadd <3 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fadd <4 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fadd half undef, undef
   %v2f16 = fadd <2 x half> undef, undef
   %v3f16 = fadd <3 x half> undef, undef
   %v4f16 = fadd <4 x half> undef, undef
+  %v5f16 = fadd <5 x half> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index fc641bba8a2b0..ff300dd209cd9 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -17,6 +17,7 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -24,12 +25,14 @@ define amdgpu_kernel void @fdiv_f32_ieee() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fdiv float undef, undef
   %v2f32 = fdiv <2 x float> undef, undef
   %v3f32 = fdiv <3 x float> undef, undef
+  %v4f32 = fdiv <4 x float> undef, undef
   %v5f32 = fdiv <5 x float> undef, undef
   ret void
 }
@@ -39,6 +42,7 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f32 = fdiv float undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2f32 = fdiv <2 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v5f32 = fdiv <5 x float> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -46,12 +50,14 @@ define amdgpu_kernel void @fdiv_f32_ftzdaz() #1 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fdiv float undef, undef
   %v2f32 = fdiv <2 x float> undef, undef
   %v3f32 = fdiv <3 x float> undef, undef
+  %v4f32 = fdiv <4 x float> undef, undef
   %v5f32 = fdiv <5 x float> undef, undef
   ret void
 }
@@ -61,53 +67,71 @@ define amdgpu_kernel void @fdiv_f64() #0 {
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'fdiv_f64'
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'fdiv_f64'
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'fdiv_f64'
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f64'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'fdiv_f64'
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'fdiv_f64'
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f64'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fdiv double undef, undef
   %v2f64 = fdiv <2 x double> undef, undef
   %v3f64 = fdiv <3 x double> undef, undef
+  %v4f64 = fdiv <4 x double> undef, undef
+  %v5f64 = fdiv <5 x double> undef, undef
   ret void
 }
 
@@ -115,30 +139,40 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; NOFP16-LABEL: 'fdiv_f16_f32ieee'
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f16_f32ieee'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
   %v2f16 = fdiv <2 x half> undef, undef
+  %v3f16 = fdiv <3 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
+  %v5f16 = fdiv <5 x half> undef, undef
   ret void
 }
 
@@ -146,125 +180,366 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; NOFP16-LABEL: 'fdiv_f16_f32ftzdaz'
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f16 = fdiv half undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; NOFP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'fdiv_f16_f32ftzdaz'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; NOFP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f16 = fdiv half undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = fdiv <2 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v3f16 = fdiv <3 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
   %v2f16 = fdiv <2 x half> undef, undef
+  %v3f16 = fdiv <3 x half> undef, undef
   %v4f16 = fdiv <4 x half> undef, undef
+  %v5f16 = fdiv <5 x half> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @rcp_ieee() #0 {
 ; CIFASTF64-LABEL: 'rcp_ieee'
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ieee'
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ieee'
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ieee'
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ieee'
-; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ieee'
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ieee'
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ieee'
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %f32 = fdiv float 1.0, undef
   %f16 = fdiv half 1.0, undef
-  %f64 = fdiv double 1.0, undef
-  %v2f32 = fdiv <2 x float> <float 1.0, float 1.0>, undef
   %v2f16 = fdiv <2 x half> <half 1.0, half 1.0>, undef
+  %v3f16 = fdiv <3 x half> <half 1.0, half 1.0, half 1.0>, undef
+  %v4f16 = fdiv <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, undef
+  %v5f16 = fdiv <5 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, undef
+  %f32 = fdiv float 1.0, undef
+  %v2f32 = fdiv <2 x float> <float 1.0, float 1.0>, undef
+  %v3f32 = fdiv <3 x float> <float 1.0, float 1.0, float 1.0>, undef
+  %v4f32 = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, undef
+  %v5f32 = fdiv <5 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, undef
+  %f64 = fdiv double 1.0, undef
+  %v2f64 = fdiv <2 x double> <double 1.0, double 1.0>, undef
+  %v3f64 = fdiv <3 x double> <double 1.0, double 1.0, double 1.0>, undef
+  %v4f64 = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, undef
+  %v5f64 = fdiv <5 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, undef
   ret void
 }
 
 define amdgpu_kernel void @rcp_ftzdaz() #1 {
-; ALL-LABEL: 'rcp_ftzdaz'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; CIFASTF64-LABEL: 'rcp_ftzdaz'
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; ALL-SIZE-LABEL: 'rcp_ftzdaz'
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; CISLOWF64-LABEL: 'rcp_ftzdaz'
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SIFASTF64-LABEL: 'rcp_ftzdaz'
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 135 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; SISLOWF64-LABEL: 'rcp_ftzdaz'
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 205 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FP16-LABEL: 'rcp_ftzdaz'
+; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; CI-SIZE-LABEL: 'rcp_ftzdaz'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SI-SIZE-LABEL: 'rcp_ftzdaz'
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 125 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FP16-SIZE-LABEL: 'rcp_ftzdaz'
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %f32 = fdiv float 1.0, undef
   %f16 = fdiv half 1.0, undef
-  %v2f32 = fdiv <2 x float> <float 1.0, float 1.0>, undef
   %v2f16 = fdiv <2 x half> <half 1.0, half 1.0>, undef
+  %v3f16 = fdiv <3 x half> <half 1.0, half 1.0, half 1.0>, undef
+  %v4f16 = fdiv <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>, undef
+  %v5f16 = fdiv <5 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, undef
+  %f32 = fdiv float 1.0, undef
+  %v2f32 = fdiv <2 x float> <float 1.0, float 1.0>, undef
+  %v3f32 = fdiv <3 x float> <float 1.0, float 1.0, float 1.0>, undef
+  %v4f32 = fdiv <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, undef
+  %v5f32 = fdiv <5 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, undef
+  %f64 = fdiv double 1.0, undef
+  %v2f64 = fdiv <2 x double> <double 1.0, double 1.0>, undef
+  %v3f64 = fdiv <3 x double> <double 1.0, double 1.0, double 1.0>, undef
+  %v4f64 = fdiv <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, undef
+  %v5f64 = fdiv <5 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
index d9e4e453abf05..55ea14537c9cc 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
@@ -12,6 +12,7 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -19,6 +20,7 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -26,6 +28,7 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -33,6 +36,7 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -40,12 +44,14 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #1
   %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #1
   %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #1
+  %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #1
   %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #1
   ret void
 }
@@ -55,35 +61,47 @@ define amdgpu_kernel void @fma_f64() #0 {
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2
+; GFX90A-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2
 ; GFX90A-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX900-LABEL: 'fma_f64'
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2
+; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2
 ; GFX900-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f64'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-SIZE-LABEL: 'fma_f64'
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fma_f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #2
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #2
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = call double @llvm.fma.f64(double undef, double undef, double undef) #1
   %v2f64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) #1
   %v3f64 = call <3 x double> @llvm.fma.v3f64(<3 x double> undef, <3 x double> undef, <3 x double> undef) #1
+  %v4f64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) #1
+  %v5f64 = call <5 x double> @llvm.fma.v5f64(<5 x double> undef, <5 x double> undef, <5 x double> undef) #1
   ret void
 }
 
@@ -92,44 +110,59 @@ define amdgpu_kernel void @fma_f16() #0 {
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f16'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-SIZE-LABEL: 'fma_f16'
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fma_f16'
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #2
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #2
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #2
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = call half @llvm.fma.f16(half undef, half undef, half undef) #1
   %v2f16 = call <2 x half> @llvm.fma.v2f16(<2 x half> undef, <2 x half> undef, <2 x half> undef) #1
   %v3f16 = call <3 x half> @llvm.fma.v3f16(<3 x half> undef, <3 x half> undef, <3 x half> undef) #1
+  %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #1
+  %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #1
   ret void
 }
 
 declare float @llvm.fma.f32(float, float, float) #1
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
 declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
 declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>) #1
 
 declare double @llvm.fma.f64(double, double, double) #1
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1
 declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>) #1
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1
+declare <5 x double> @llvm.fma.v5f64(<5 x double>, <5 x double>, <5 x double>) #1
 
 declare half @llvm.fma.f16(half, half, half) #1
 declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
 declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #1
+declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
+declare <5 x half> @llvm.fma.v5f16(<5 x half>, <5 x half>, <5 x half>) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index 82e8f0fb90a78..5eb4aa78b8c9e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -12,6 +12,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -19,6 +20,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; F32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; F32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
 ; F32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -26,6 +28,7 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -33,12 +36,14 @@ define amdgpu_kernel void @fmul_f32() #0 {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fmul float undef, undef
   %v2f32 = fmul <2 x float> undef, undef
   %v3f32 = fmul <3 x float> undef, undef
+  %v4f32 = fmul <4 x float> undef, undef
   %v5f32 = fmul <5 x float> undef, undef
   ret void
 }
@@ -48,35 +53,47 @@ define amdgpu_kernel void @fmul_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fmul_f64'
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f64'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fmul double undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-SIZE-LABEL: 'fmul_f64'
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef
+; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; GFX90A-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fmul_f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fmul <5 x double> undef, undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fmul double undef, undef
   %v2f64 = fmul <2 x double> undef, undef
   %v3f64 = fmul <3 x double> undef, undef
+  %v4f64 = fmul <4 x double> undef, undef
+  %v5f64 = fmul <5 x double> undef, undef
   ret void
 }
 
@@ -86,6 +103,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f16'
@@ -93,6 +111,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-SIZE-LABEL: 'fmul_f16'
@@ -100,6 +119,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fmul_f16'
@@ -107,12 +127,14 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef
+; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fmul half undef, undef
   %v2f16 = fmul <2 x half> undef, undef
   %v3f16 = fmul <3 x half> undef, undef
   %v4f16 = fmul <4 x half> undef, undef
+  %v5f16 = fmul <5 x half> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
index 970c8d9d2886a..b4482906cf400 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@@ -8,6 +8,7 @@ define amdgpu_kernel void @fneg_f32() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -15,12 +16,14 @@ define amdgpu_kernel void @fneg_f32() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32 = fneg float undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32 = fneg <2 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f32 = fneg <3 x float> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32 = fneg <4 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f32 = fneg <5 x float> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fneg float undef
   %v2f32 = fneg <2 x float> undef
   %v3f32 = fneg <3 x float> undef
+  %v4f32 = fneg <4 x float> undef
   %v5f32 = fneg <5 x float> undef
   ret void
 }
@@ -30,17 +33,23 @@ define amdgpu_kernel void @fneg_f64() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = fneg <4 x double> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = fneg <5 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fneg_f64'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64 = fneg double undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64 = fneg <2 x double> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f64 = fneg <3 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f64 = fneg <4 x double> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5f64 = fneg <5 x double> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fneg double undef
   %v2f64 = fneg <2 x double> undef
   %v3f64 = fneg <3 x double> undef
+  %v4f64 = fneg <4 x double> undef
+  %v5f64 = fneg <5 x double> undef
   ret void
 }
 
@@ -49,16 +58,22 @@ define amdgpu_kernel void @fneg_f16() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fneg_f16'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16 = fneg half undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16 = fneg <2 x half> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3f16 = fneg <3 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f16 = fneg <4 x half> undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fneg <5 x half> undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fneg half undef
   %v2f16 = fneg <2 x half> undef
   %v3f16 = fneg <3 x half> undef
+  %v4f16 = fneg <4 x half> undef
+  %v5f16 = fneg <5 x half> undef
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index 2340b377e67d6..a10ef8e16b022 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -12,6 +12,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -19,6 +20,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -26,6 +28,7 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -33,12 +36,14 @@ define amdgpu_kernel void @fsub_f32() #0 {
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fsub float undef, undef
   %v2f32 = fsub <2 x float> undef, undef
   %v3f32 = fsub <3 x float> undef, undef
+  %v4f32 = fsub <4 x float> undef, undef
   %v5f32 = fsub <5 x float> undef, undef
   ret void
 }
@@ -48,35 +53,47 @@ define amdgpu_kernel void @fsub_f64() #0 {
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
+; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF64-LABEL: 'fsub_f64'
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f64'
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fsub double undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64'
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
+; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64'
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
+; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f64 = fsub <5 x double> undef, undef
 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fsub double undef, undef
   %v2f64 = fsub <2 x double> undef, undef
   %v3f64 = fsub <3 x double> undef, undef
+  %v4f64 = fsub <4 x double> undef, undef
+  %v5f64 = fsub <5 x double> undef, undef
   ret void
 }
 
@@ -86,6 +103,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f16'
@@ -93,6 +111,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FASTF16-SIZE-LABEL: 'fsub_f16'
@@ -100,6 +119,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
@@ -107,11 +127,13 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fsub half undef, undef
   %v2f16 = fsub <2 x half> undef, undef
   %v3f16 = fsub <3 x half> undef, undef
   %v4f16 = fsub <4 x half> undef, undef
+  %v5f16 = fsub <5 x half> undef, undef
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
index 50fccb9a2f2a3..86db73085396d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
@@ -22,6 +22,8 @@ define void @fmul_fadd_f32() #0 {
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef
+; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
 ; SLOWF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -35,6 +37,8 @@ define void @fmul_fadd_f32() #0 {
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef
+; FASTF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
 ; FASTF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -48,6 +52,8 @@ define void @fmul_fadd_f32() #0 {
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32_2 = fmul float undef, undef
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef
+; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
 ; SLOWF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
@@ -61,18 +67,27 @@ define void @fmul_fadd_f32() #0 {
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32add = fadd <2 x float> %v2f32, undef
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32_2 = fmul float undef, undef
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32sub = fsub float %f32_2, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f32c_2 = fmul contract float undef, undef
+; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32csub = fsub contract float %f32c_2, undef
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32_2 = fmul <2 x float> undef, undef
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32sub = fsub <2 x float> %v2f32_2, undef
 ; FASTF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f32 = fmul float undef, undef
   %f32add = fadd float %f32, undef
+
   %f32c = fmul contract float undef, undef
   %f32cadd = fadd contract float %f32c, undef
+
   %v2f32 = fmul <2 x float> undef, undef
   %v2f32add = fadd <2 x float> %v2f32, undef
+
   %f32_2 = fmul float undef, undef
   %f32sub = fsub float %f32_2, undef
+
+  %f32c_2 = fmul contract float undef, undef
+  %f32csub = fsub contract float %f32c_2, undef
+
   %v2f32_2 = fmul <2 x float> undef, undef
   %v2f32sub = fsub <2 x float> %v2f32_2, undef
   ret void
@@ -88,6 +103,8 @@ define void @fmul_fadd_f16() #0 {
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef
+; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
 ; FUSED-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -101,6 +118,8 @@ define void @fmul_fadd_f16() #0 {
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef
+; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
 ; GFX9SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -114,6 +133,8 @@ define void @fmul_fadd_f16() #0 {
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16_2 = fmul half undef, undef
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef
+; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
 ; FUSED-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
@@ -127,18 +148,27 @@ define void @fmul_fadd_f16() #0 {
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16add = fadd <2 x half> %v2f16, undef
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16_2 = fmul half undef, undef
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16sub = fsub half %f16_2, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f16c_2 = fmul contract half undef, undef
+; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f15csub = fsub contract half %f16c_2, undef
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16_2 = fmul <2 x half> undef, undef
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16sub = fsub <2 x half> %v2f16_2, undef
 ; GFX9SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fmul half undef, undef
   %f16add = fadd half %f16, undef
+
   %f16c = fmul contract half undef, undef
   %f15cadd = fadd contract half %f16c, undef
+
   %v2f16 = fmul <2 x half> undef, undef
   %v2f16add = fadd <2 x half> %v2f16, undef
+
   %f16_2 = fmul half undef, undef
   %f16sub = fsub half %f16_2, undef
+
+  %f16c_2 = fmul contract half undef, undef
+  %f15csub = fsub contract half %f16c_2, undef
+
   %v2f16_2 = fmul <2 x half> undef, undef
   %v2f16sub = fsub <2 x half> %v2f16_2, undef
   ret void
@@ -154,6 +184,8 @@ define void @fmul_fadd_f64() #0 {
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64_2 = fmul double undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64csub = fsub contract double %f64c_2, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -167,6 +199,8 @@ define void @fmul_fadd_f64() #0 {
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64sub = fsub double %f64_2, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef
+; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64csub = fsub contract double %f64c_2, undef
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
 ; GFX9FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
@@ -180,6 +214,8 @@ define void @fmul_fadd_f64() #0 {
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64_2 = fmul double undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64csub = fsub contract double %f64c_2, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
@@ -193,18 +229,27 @@ define void @fmul_fadd_f64() #0 {
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64add = fadd <2 x double> %v2f64, undef
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64_2 = fmul double undef, undef
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64sub = fsub double %f64_2, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f64c_2 = fmul contract double undef, undef
+; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64csub = fsub contract double %f64c_2, undef
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = fmul <2 x double> undef, undef
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64sub = fsub <2 x double> %v2f64_2, undef
 ; GFX9FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f64 = fmul double undef, undef
   %f64add = fadd double %f64, undef
+
   %f64c = fmul contract double undef, undef
   %f64cadd = fadd contract double %f64c, undef
+
   %v2f64 = fmul <2 x double> undef, undef
   %v2f64add = fadd <2 x double> %v2f64, undef
+
   %f64_2 = fmul double undef, undef
   %f64sub = fsub double %f64_2, undef
+
+  %f64c_2 = fmul contract double undef, undef
+  %f64csub = fsub contract double %f64c_2, undef
+
   %v2f64_2 = fmul <2 x double> undef, undef
   %v2f64sub = fsub <2 x double> %v2f64_2, undef
   ret void

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
index e0f64201b52a5..1bdbb5b189205 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll
@@ -1,49 +1,222 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89 %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=CI-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GFX89-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX89-SIZE %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=ALL,CI %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=ALL,GFX89 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=ALL,GFX89 %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=ALL-SIZE,CI-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=ALL-SIZE,GFX89-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=ALL-SIZE,GFX89-SIZE %s
 ; END.
 
-define amdgpu_kernel void @insertelement_v2() {
-; CI-LABEL: 'insertelement_v2'
-; CI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
-; CI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
-; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
-; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
-; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+define amdgpu_kernel void @insertelement_i8(i32 %arg) {
+; ALL-LABEL: 'insertelement_i8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'insertelement_i8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %v2i8_0 = insertelement <2 x i8> undef, i8 42, i32 0
+  %v3i8_0 = insertelement <3 x i8> undef, i8 42, i32 0
+  %v4i8_0 = insertelement <4 x i8> undef, i8 42, i32 0
+  %v5i8_0 = insertelement <5 x i8> undef, i8 42, i32 0
+  %v2i8_1 = insertelement <2 x i8> undef, i8 42, i32 1
+  %v3i8_1 = insertelement <3 x i8> undef, i8 42, i32 1
+  %v4i8_1 = insertelement <4 x i8> undef, i8 42, i32 1
+  %v5i8_1 = insertelement <5 x i8> undef, i8 42, i32 1
+  %v2i8_a = insertelement <2 x i8> undef, i8 42, i32 %arg
+  %v3i8_a = insertelement <3 x i8> undef, i8 42, i32 %arg
+  %v4i8_a = insertelement <4 x i8> undef, i8 42, i32 %arg
+  %v5i8_a = insertelement <5 x i8> undef, i8 42, i32 %arg
+  ret void
+}
+
+define amdgpu_kernel void @insertelement_i16(i32 %arg) {
+; CI-LABEL: 'insertelement_i16'
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg
+; CI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg
 ; CI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; GFX89-LABEL: 'insertelement_v2'
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
-; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; GFX89-LABEL: 'insertelement_i16'
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg
+; GFX89-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg
 ; GFX89-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; CI-SIZE-LABEL: 'insertelement_v2'
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; CI-SIZE-LABEL: 'insertelement_i16'
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; GFX89-SIZE-LABEL: 'insertelement_v2'
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
-; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+; GFX89-SIZE-LABEL: 'insertelement_i16'
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg
+; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg
 ; GFX89-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %v2i32_1 = insertelement <2 x i32> undef, i32 123, i32 1
-  %v2i64_1 = insertelement <2 x i64> undef, i64 123, i64 1
-  %v2i16_0 = insertelement <2 x i16> undef, i16 123, i16 0
-  %v2i16_1 = insertelement <2 x i16> undef, i16 123, i16 1
-  %v2i8_1 = insertelement <2 x i8> undef, i8 123, i8 1
+  %v2i16_0 = insertelement <2 x i16> undef, i16 42, i32 0
+  %v3i16_0 = insertelement <3 x i16> undef, i16 42, i32 0
+  %v4i16_0 = insertelement <4 x i16> undef, i16 42, i32 0
+  %v5i16_0 = insertelement <5 x i16> undef, i16 42, i32 0
+  %v2i16_1 = insertelement <2 x i16> undef, i16 42, i32 1
+  %v3i16_1 = insertelement <3 x i16> undef, i16 42, i32 1
+  %v4i16_1 = insertelement <4 x i16> undef, i16 42, i32 1
+  %v5i16_1 = insertelement <5 x i16> undef, i16 42, i32 1
+  %v2i16_a = insertelement <2 x i16> undef, i16 42, i32 %arg
+  %v3i16_a = insertelement <3 x i16> undef, i16 42, i32 %arg
+  %v4i16_a = insertelement <4 x i16> undef, i16 42, i32 %arg
+  %v5i16_a = insertelement <5 x i16> undef, i16 42, i32 %arg
+  ret void
+}
+
+define amdgpu_kernel void @insertelement_i32(i32 %arg) {
+; ALL-LABEL: 'insertelement_i32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'insertelement_i32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %v2i32_0 = insertelement <2 x i32> undef, i32 42, i32 0
+  %v3i32_0 = insertelement <3 x i32> undef, i32 42, i32 0
+  %v4i32_0 = insertelement <4 x i32> undef, i32 42, i32 0
+  %v5i32_0 = insertelement <5 x i32> undef, i32 42, i32 0
+  %v2i32_1 = insertelement <2 x i32> undef, i32 42, i32 1
+  %v3i32_1 = insertelement <3 x i32> undef, i32 42, i32 1
+  %v4i32_1 = insertelement <4 x i32> undef, i32 42, i32 1
+  %v5i32_1 = insertelement <5 x i32> undef, i32 42, i32 1
+  %v2i32_a = insertelement <2 x i32> undef, i32 42, i32 %arg
+  %v3i32_a = insertelement <3 x i32> undef, i32 42, i32 %arg
+  %v4i32_a = insertelement <4 x i32> undef, i32 42, i32 %arg
+  %v5i32_a = insertelement <5 x i32> undef, i32 42, i32 %arg
+  ret void
+}
+
+define amdgpu_kernel void @insertelement_i64(i32 %arg) {
+; ALL-LABEL: 'insertelement_i64'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'insertelement_i64'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %v2i64_0 = insertelement <2 x i64> undef, i64 42, i32 0
+  %v3i64_0 = insertelement <3 x i64> undef, i64 42, i32 0
+  %v4i64_0 = insertelement <4 x i64> undef, i64 42, i32 0
+  %v5i64_0 = insertelement <5 x i64> undef, i64 42, i32 0
+  %v2i64_1 = insertelement <2 x i64> undef, i64 42, i32 1
+  %v3i64_1 = insertelement <3 x i64> undef, i64 42, i32 1
+  %v4i64_1 = insertelement <4 x i64> undef, i64 42, i32 1
+  %v5i64_1 = insertelement <5 x i64> undef, i64 42, i32 1
+  %v2i64_a = insertelement <2 x i64> undef, i64 42, i32 %arg
+  %v3i64_a = insertelement <3 x i64> undef, i64 42, i32 %arg
+  %v4i64_a = insertelement <4 x i64> undef, i64 42, i32 %arg
+  %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
index 0eedf867502eb..c838dc7a70b55 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
@@ -3,8 +3,9 @@
 ; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s --check-prefix=CHECK-SIZE
 ; END.
 
+; Logical and/or - select's cost must be equivalent to that of binop
+
 define amdgpu_kernel void @op() {
-  ; Logical and/or - select's cost must be equivalent to that of binop
 ; CHECK-THROUGHPUT-LABEL: 'op'
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select i1 undef, i1 undef, i1 false
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and i1 undef, undef
@@ -29,23 +30,35 @@ define amdgpu_kernel void @op() {
 
 define void @vecop() {
 ; CHECK-THROUGHPUT-LABEL: 'vecop'
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> <i1 true, i1 true>, <2 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CHECK-SIZE-LABEL: 'vecop'
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> <i1 true, i1 true>, <2 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> <i1 false, i1 false, i1 false, i1 false>
-  %band = and <4 x i1> undef, undef
-  %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
-  %bor = or <4 x i1> undef, undef
+  %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> <i1 false, i1 false>
+  %v2band = and <2 x i1> undef, undef
+  %v2sor = select <2 x i1> undef, <2 x i1> <i1 true, i1 true>, <2 x i1> undef
+  %v2bor = or <2 x i1> undef, undef
+  %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> <i1 false, i1 false, i1 false, i1 false>
+  %v4band = and <4 x i1> undef, undef
+  %v4sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+  %v4bor = or <4 x i1> undef, undef
 
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 6d702aed3cf43..3c5e3da71079e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -36,6 +36,7 @@ define amdgpu_kernel void @mul_i64() #0 {
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v2i64 = mul <2 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v3i64 = mul <3 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = mul <4 x i64> undef, undef
+; ALL-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v5i64 = mul <4 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v8i64 = mul <8 x i64> undef, undef
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
@@ -44,6 +45,7 @@ define amdgpu_kernel void @mul_i64() #0 {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2i64 = mul <2 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3i64 = mul <3 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4i64 = mul <4 x i64> undef, undef
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5i64 = mul <4 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v8i64 = mul <8 x i64> undef, undef
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -51,6 +53,7 @@ define amdgpu_kernel void @mul_i64() #0 {
   %v2i64 = mul <2 x i64> undef, undef
   %v3i64 = mul <3 x i64> undef, undef
   %v4i64 = mul <4 x i64> undef, undef
+  %v5i64 = mul <4 x i64> undef, undef
   %v8i64 = mul <8 x i64> undef, undef
   ret void
 }
@@ -60,29 +63,39 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i16 = mul <4 x i16> undef, undef
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST16-LABEL: 'mul_i16'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i16 = mul i16 undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'mul_i16'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FAST16-SIZE-LABEL: 'mul_i16'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i16 = mul i16 undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = mul <2 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = mul <3 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = mul i16 undef, undef
   %v2i16 = mul <2 x i16> undef, undef
   %v3i16 = mul <3 x i16> undef, undef
+  %v4i16 = mul <4 x i16> undef, undef
+  %v5i16 = mul <5 x i16> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
index f2a6a7902097f..3ab46ad8e14b3 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shifts.ll
@@ -7,109 +7,349 @@
 
 define amdgpu_kernel void @shl() #0 {
 ; FAST64-LABEL: 'shl'
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = shl <3 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shl <4 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = shl <5 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'shl'
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = shl <3 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shl <4 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = shl <5 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = shl <2 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = shl <3 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = shl <4 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'shl'
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = shl <3 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shl <4 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = shl <5 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'shl'
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = shl i8 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = shl <2 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = shl <3 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = shl <4 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = shl <5 x i8> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = shl i16 undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = shl <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = shl <3 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = shl <4 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = shl <5 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = shl i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = shl <2 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = shl <3 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = shl <4 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = shl <5 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = shl i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = shl <2 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = shl <3 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = shl <4 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = shl <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %i32 = shl i32 undef, undef
-  %i64 = shl i64 undef, undef
+  %i8 = shl i8 undef, undef
+  %v2i8 = shl <2 x i8> undef, undef
+  %v3i8 = shl <3 x i8> undef, undef
+  %v4i8 = shl <4 x i8> undef, undef
+  %v5i8 = shl <5 x i8> undef, undef
   %i16 = shl i16 undef, undef
   %v2i16 = shl <2 x i16> undef, undef
+  %v3i16 = shl <3 x i16> undef, undef
+  %v4i16 = shl <4 x i16> undef, undef
+  %v5i16 = shl <5 x i16> undef, undef
+  %i32 = shl i32 undef, undef
+  %v2i32 = shl <2 x i32> undef, undef
+  %v3i32 = shl <3 x i32> undef, undef
+  %v4i32 = shl <4 x i32> undef, undef
+  %v5i32 = shl <5 x i32> undef, undef
+  %i64 = shl i64 undef, undef
+  %v2i64 = shl <2 x i64> undef, undef
+  %v3i64 = shl <3 x i64> undef, undef
+  %v4i64 = shl <4 x i64> undef, undef
+  %v5i64 = shl <5 x i64> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @lshr() #0 {
 ; FAST64-LABEL: 'lshr'
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = lshr <3 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = lshr <4 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = lshr <5 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'lshr'
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = lshr <3 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = lshr <4 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = lshr <5 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = lshr <2 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = lshr <3 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = lshr <4 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'lshr'
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = lshr <3 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = lshr <4 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = lshr <5 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'lshr'
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = lshr i8 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = lshr <2 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = lshr <3 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = lshr <4 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = lshr <5 x i8> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = lshr i16 undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = lshr <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = lshr <3 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = lshr <4 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = lshr <5 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = lshr i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = lshr <2 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = lshr <3 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = lshr <4 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = lshr <5 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = lshr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = lshr <2 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = lshr <3 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = lshr <4 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = lshr <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %i32 = lshr i32 undef, undef
-  %i64 = lshr i64 undef, undef
+  %i8 = lshr i8 undef, undef
+  %v2i8 = lshr <2 x i8> undef, undef
+  %v3i8 = lshr <3 x i8> undef, undef
+  %v4i8 = lshr <4 x i8> undef, undef
+  %v5i8 = lshr <5 x i8> undef, undef
   %i16 = lshr i16 undef, undef
   %v2i16 = lshr <2 x i16> undef, undef
+  %v3i16 = lshr <3 x i16> undef, undef
+  %v4i16 = lshr <4 x i16> undef, undef
+  %v5i16 = lshr <5 x i16> undef, undef
+  %i32 = lshr i32 undef, undef
+  %v2i32 = lshr <2 x i32> undef, undef
+  %v3i32 = lshr <3 x i32> undef, undef
+  %v4i32 = lshr <4 x i32> undef, undef
+  %v5i32 = lshr <5 x i32> undef, undef
+  %i64 = lshr i64 undef, undef
+  %v2i64 = lshr <2 x i64> undef, undef
+  %v3i64 = lshr <3 x i64> undef, undef
+  %v4i64 = lshr <4 x i64> undef, undef
+  %v5i64 = lshr <5 x i64> undef, undef
   ret void
 }
 
 define amdgpu_kernel void @ashr() #0 {
 ; FAST64-LABEL: 'ashr'
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
-; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = ashr <3 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = ashr <4 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = ashr <5 x i16> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
+; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; FAST64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW64-LABEL: 'ashr'
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
-; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = ashr <3 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = ashr <4 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = ashr <5 x i16> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2i64 = ashr <2 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3i64 = ashr <3 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4i64 = ashr <4 x i64> undef, undef
+; SLOW64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; SLOW64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FAST64-SIZE-LABEL: 'ashr'
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
-; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3i16 = ashr <3 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = ashr <4 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = ashr <5 x i16> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
+; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; FAST64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW64-SIZE-LABEL: 'ashr'
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
-; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i8 = ashr i8 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i8 = ashr <2 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i8 = ashr <3 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i8 = ashr <4 x i8> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i8 = ashr <5 x i8> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i16 = ashr i16 undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16 = ashr <2 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3i16 = ashr <3 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = ashr <4 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = ashr <5 x i16> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %i32 = ashr i32 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = ashr <2 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3i32 = ashr <3 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = ashr <4 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5i32 = ashr <5 x i32> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %i64 = ashr i64 undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = ashr <2 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3i64 = ashr <3 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = ashr <4 x i64> undef, undef
+; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5i64 = ashr <5 x i64> undef, undef
 ; SLOW64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %i32 = ashr i32 undef, undef
-  %i64 = ashr i64 undef, undef
+  %i8 = ashr i8 undef, undef
+  %v2i8 = ashr <2 x i8> undef, undef
+  %v3i8 = ashr <3 x i8> undef, undef
+  %v4i8 = ashr <4 x i8> undef, undef
+  %v5i8 = ashr <5 x i8> undef, undef
   %i16 = ashr i16 undef, undef
   %v2i16 = ashr <2 x i16> undef, undef
+  %v3i16 = ashr <3 x i16> undef, undef
+  %v4i16 = ashr <4 x i16> undef, undef
+  %v5i16 = ashr <5 x i16> undef, undef
+  %i32 = ashr i32 undef, undef
+  %v2i32 = ashr <2 x i32> undef, undef
+  %v3i32 = ashr <3 x i32> undef, undef
+  %v4i32 = ashr <4 x i32> undef, undef
+  %v5i32 = ashr <5 x i32> undef, undef
+  %i64 = ashr i64 undef, undef
+  %v2i64 = ashr <2 x i64> undef, undef
+  %v3i64 = ashr <3 x i64> undef, undef
+  %v4i64 = ashr <4 x i64> undef, undef
+  %v5i64 = ashr <5 x i64> undef, undef
   ret void
 }
 

diff  --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
index 17c21affbf9a6..4e5b8683919b1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll
@@ -7,143 +7,549 @@
 ; RUN: opt < %s -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -cost-kind=code-size -S | FileCheck -check-prefixes=ALL-SIZE,VI-SIZE %s
 ; END.
 
-define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> %vec0, <2 x i16> %vec1) {
-; GFX9-10-LABEL: 'shufflevector_00_v2i16'
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+define amdgpu_kernel void @shufflevector_i16() {
+; GFX9-10-LABEL: 'shufflevector_i16'
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 3>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 3>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; VI-LABEL: 'shufflevector_00_v2i16'
-; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; VI-LABEL: 'shufflevector_i16'
+; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 3>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 3>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; GFX9-10-SIZE-LABEL: 'shufflevector_00_v2i16'
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; GFX9-10-SIZE-LABEL: 'shufflevector_i16'
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 3>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 3>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-; VI-SIZE-LABEL: 'shufflevector_00_v2i16'
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+; VI-SIZE-LABEL: 'shufflevector_i16'
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 3>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 3>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> zeroinitializer
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %shuf00 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> zeroinitializer
-  %shuf01 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
-  %shuf10 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-  %shuf11 = shufflevector <2 x i16> %vec0, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
-  %shuf02 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 0, i32 2>
-  %shuf20 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 0>
-  %shuf22 = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> <i32 2, i32 2>
+  %shuf00 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer
+  %shuf01 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %shuf10 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+  %shuf02 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
+  %shuf20 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 0>
+  %shuf22 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 2>
+  %shuf03 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 3>
+  %shuf30 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 0>
+  %shuf33 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 3>
+  %shuf12 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 2>
+  %shuf21 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 1>
+  %shuf13 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
+  %shuf31 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 1>
+  %shuf23 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 2, i32 3>
+  %shuf32 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 3, i32 2>
+  %shuf000 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 0>
+  %shuf001 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 1>
+  %shuf010 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 0>
+  %shuf011 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 1, i32 1>
+  %shuf100 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 0>
+  %shuf101 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 0, i32 1>
+  %shuf110 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 0>
+  %shuf111 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 1>
+  %shuf002 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 0, i32 2>
+  %shuf020 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 0>
+  %shuf022 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 0, i32 2, i32 2>
+  %shuf200 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 0>
+  %shuf202 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 0, i32 2>
+  %shuf220 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 0>
+  %shuf222 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 2>
+  %shuf112 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 1, i32 2>
+  %shuf121 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 1>
+  %shuf122 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 1, i32 2, i32 2>
+  %shuf211 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 1>
+  %shuf212 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 1, i32 2>
+  %shuf221 = shufflevector <2 x i16> undef, <2 x i16> undef, <3 x i32> <i32 2, i32 2, i32 1>
   ret void
 }
 
 ; Should not assert
-define amdgpu_kernel void @shufflevector_xxx(<2 x i8> %vec0) {
-; ALL-LABEL: 'shufflevector_xxx'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+define amdgpu_kernel void @shufflevector_i8() {
+; ALL-LABEL: 'shufflevector_i8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; ALL-SIZE-LABEL: 'shufflevector_xxx'
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; ALL-SIZE-LABEL: 'shufflevector_i8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> zeroinitializer
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 1>
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %shuf01uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %shuf10uu = shufflevector <2 x i8> %vec0, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuf00 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer
+  %shuf01 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %shuf10 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  %shuf11 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 1>
+  %shuf02 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 2>
+  %shuf20 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 0>
+  %shuf22 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 2>
+  %shuf03 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 3>
+  %shuf30 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 0>
+  %shuf33 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 3>
+  %shuf12 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 2>
+  %shuf21 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 1>
+  %shuf13 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 3>
+  %shuf31 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 1>
+  %shuf23 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 2, i32 3>
+  %shuf32 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 3, i32 2>
+  %shuf000 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 0>
+  %shuf001 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 1>
+  %shuf010 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 0>
+  %shuf011 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 1>
+  %shuf100 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 0>
+  %shuf101 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 0, i32 1>
+  %shuf110 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 0>
+  %shuf111 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 1>
+  %shuf002 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 0, i32 2>
+  %shuf020 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 0>
+  %shuf022 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 0, i32 2, i32 2>
+  %shuf200 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 0>
+  %shuf202 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 0, i32 2>
+  %shuf220 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 0>
+  %shuf222 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 2>
+  %shuf112 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 1, i32 2>
+  %shuf121 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 1>
+  %shuf122 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 1, i32 2, i32 2>
+  %shuf211 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 1>
+  %shuf212 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 1, i32 2>
+  %shuf221 = shufflevector <2 x i8> undef, <2 x i8> undef, <3 x i32> <i32 2, i32 2, i32 1>
+  ret void
+}
+
+define amdgpu_kernel void @shufflevector_i32() {
+; ALL-LABEL: 'shufflevector_i32'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 3>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; ALL-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; ALL-SIZE-LABEL: 'shufflevector_i32'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 3>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> zeroinitializer
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 0>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 2>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 1>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %shuf00 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer
+  %shuf01 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %shuf10 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %shuf11 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %shuf02 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 2>
+  %shuf20 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 0>
+  %shuf22 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 2>
+  %shuf03 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %shuf30 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 0>
+  %shuf33 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 3>
+  %shuf12 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 2>
+  %shuf21 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 1>
+  %shuf13 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
+  %shuf31 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 1>
+  %shuf23 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %shuf32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 3, i32 2>
+  %shuf000 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 0>
+  %shuf001 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 1>
+  %shuf010 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 0>
+  %shuf011 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 1, i32 1>
+  %shuf100 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 0>
+  %shuf101 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 0, i32 1>
+  %shuf110 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 0>
+  %shuf111 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 1>
+  %shuf002 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 0, i32 2>
+  %shuf020 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 0>
+  %shuf022 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 0, i32 2, i32 2>
+  %shuf200 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 0>
+  %shuf202 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 0, i32 2>
+  %shuf220 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 0>
+  %shuf222 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 2>
+  %shuf112 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 1, i32 2>
+  %shuf121 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 1>
+  %shuf122 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 1, i32 2, i32 2>
+  %shuf211 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 1>
+  %shuf212 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 1, i32 2>
+  %shuf221 = shufflevector <2 x i32> undef, <2 x i32> undef, <3 x i32> <i32 2, i32 2, i32 1>
   ret void
 }
 
 ; Other shuffle cases
 define void @shuffle() {
 ; GFX9-10-LABEL: 'shuffle'
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
 ; GFX9-10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; VI-LABEL: 'shuffle'
-; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of -1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; VI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
 ; VI-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; GFX9-10-SIZE-LABEL: 'shuffle'
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
 ; GFX9-10-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; VI-SIZE-LABEL: 'shuffle'
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
 ; VI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
-  %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
-  %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-  %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-  %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-  %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
-  %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-  %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
-  %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
-  %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
-  %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
-  %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v2i8_2 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 0>
+  %v2i8_4 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v4i8_4 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v2i8_8 = shufflevector <2 x i8> undef, <2 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v4i8_8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v6i8_8 = shufflevector <6 x i8> undef, <6 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v8i8_8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v16i8_16 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v2i16_2 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %v4i16_4 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v8i16_8 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
+  %v2i32_2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+  %v4i32_4 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v2f32_2 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %v4f32_4 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
+  %v2i64_2 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+  %v2f64_2 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   ret void
 }
 


        


More information about the llvm-commits mailing list